diff mbox series

[FFmpeg-devel] avdevice/v4l2: Switch to wrapped AVFrames and implement strides

Message ID 20230929-v4l2-strides-v1-1-26c10bf10726@asahilina.net
State New
Headers show
Series [FFmpeg-devel] avdevice/v4l2: Switch to wrapped AVFrames and implement strides | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Asahi Lina Sept. 29, 2023, 7:52 a.m. UTC
V4L2 provides a line stride to the client for hardware that has
alignment requirements. rawvideo cannot represent this, so switch to
wrapped_avframe for raw video formats and calculate the plane strides
manually.

This is slightly messy because the existing helper APIs expect
dimensions and an alignment value, while v4l2 provides the stride of
plane 0 and the subsequent plane strides are implied, so we need to
open-code the logic to calculate the plane strides.

This makes vertical video work properly on Apple Macs with "1080p"
cameras, which are actually square and can support resolutions like
1080x1920, which require stride padding to a multiple of 64 bytes.

In principle, this could be extended to support the V4L2 multiplanar
API, though there seem to be practically no capture (not M2M) drivers
that support this, so it's not terribly useful right now.

Signed-off-by: Asahi Lina <lina@asahilina.net>
---
 libavdevice/v4l2-common.c |  68 +++++++++++------------
 libavdevice/v4l2.c        | 138 +++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 151 insertions(+), 55 deletions(-)


---
base-commit: b643af4acb471c3cb09b02afcc511498c9674347
change-id: 20230929-v4l2-strides-443dc65b5cb8

Thank you,
~~ Lina

Comments

rcombs Sept. 29, 2023, 7:56 a.m. UTC | #1
> On Sep 29, 2023, at 00:52, Asahi Lina via ffmpeg-devel <ffmpeg-devel@ffmpeg.org> wrote:
> 
> V4L2 provides a line stride to the client for hardware that has
> alignment requirements. rawvideo cannot represent this, so switch to
> wrapped_avframe for raw video formats and calculate the plane strides
> manually.
> 
> This is slightly messy because the existing helper APIs expect
> dimensions and an alignment value, while v4l2 provides the stride of
> plane 0 and the subsequent plane strides are implied, so we need to
> open-code the logic to calculate the plane strides.
> 
> This makes vertical video work properly on Apple Macs with "1080p"
> cameras, which are actually square and can support resolutions like
> 1080x1920, which require stride padding to a multiple of 64 bytes.
> 
> In principle, this could be extended to support the V4L2 multiplanar
> API, though there seem to be practically no capture (not M2M) drivers
> that support this, so it's not terribly useful right now.
> 
> Signed-off-by: Asahi Lina <lina@asahilina.net>
> ---
> libavdevice/v4l2-common.c |  68 +++++++++++------------
> libavdevice/v4l2.c        | 138 +++++++++++++++++++++++++++++++++++++++-------
> 2 files changed, 151 insertions(+), 55 deletions(-)
> 
> diff --git a/libavdevice/v4l2-common.c b/libavdevice/v4l2-common.c
> index b5b4448a3154..944ffe3d87e1 100644
> --- a/libavdevice/v4l2-common.c
> +++ b/libavdevice/v4l2-common.c
> @@ -19,53 +19,53 @@
> #include "v4l2-common.h"
> 
> const struct fmt_map ff_fmt_conversion_table[] = {
> -    //ff_fmt              codec_id              v4l2_fmt
> -    { AV_PIX_FMT_YUV420P, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_YUV420  },
> -    { AV_PIX_FMT_YUV420P, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_YVU420  },
> -    { AV_PIX_FMT_YUV422P, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_YUV422P },
> -    { AV_PIX_FMT_YUYV422, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_YUYV    },
> -    { AV_PIX_FMT_UYVY422, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_UYVY    },
> -    { AV_PIX_FMT_YUV411P, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_YUV411P },
> -    { AV_PIX_FMT_YUV410P, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_YUV410  },
> -    { AV_PIX_FMT_YUV410P, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_YVU410  },
> -    { AV_PIX_FMT_RGB555LE,AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_RGB555  },
> -    { AV_PIX_FMT_RGB555BE,AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_RGB555X },
> -    { AV_PIX_FMT_RGB565LE,AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_RGB565  },
> -    { AV_PIX_FMT_RGB565BE,AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_RGB565X },
> -    { AV_PIX_FMT_BGR24,   AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_BGR24   },
> -    { AV_PIX_FMT_RGB24,   AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_RGB24   },
> +    //ff_fmt              codec_id                     v4l2_fmt
> +    { AV_PIX_FMT_YUV420P, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_YUV420  },
> +    { AV_PIX_FMT_YUV420P, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_YVU420  },
> +    { AV_PIX_FMT_YUV422P, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_YUV422P },
> +    { AV_PIX_FMT_YUYV422, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_YUYV    },
> +    { AV_PIX_FMT_UYVY422, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_UYVY    },
> +    { AV_PIX_FMT_YUV411P, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_YUV411P },
> +    { AV_PIX_FMT_YUV410P, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_YUV410  },
> +    { AV_PIX_FMT_YUV410P, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_YVU410  },
> +    { AV_PIX_FMT_RGB555LE,AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_RGB555  },
> +    { AV_PIX_FMT_RGB555BE,AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_RGB555X },
> +    { AV_PIX_FMT_RGB565LE,AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_RGB565  },
> +    { AV_PIX_FMT_RGB565BE,AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_RGB565X },
> +    { AV_PIX_FMT_BGR24,   AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_BGR24   },
> +    { AV_PIX_FMT_RGB24,   AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_RGB24   },
> #ifdef V4L2_PIX_FMT_XBGR32
> -    { AV_PIX_FMT_BGR0,    AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_XBGR32  },
> -    { AV_PIX_FMT_0RGB,    AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_XRGB32  },
> -    { AV_PIX_FMT_BGRA,    AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_ABGR32  },
> -    { AV_PIX_FMT_ARGB,    AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_ARGB32  },
> +    { AV_PIX_FMT_BGR0,    AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_XBGR32  },
> +    { AV_PIX_FMT_0RGB,    AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_XRGB32  },
> +    { AV_PIX_FMT_BGRA,    AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_ABGR32  },
> +    { AV_PIX_FMT_ARGB,    AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_ARGB32  },
> #endif
> -    { AV_PIX_FMT_BGR0,    AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_BGR32   },
> -    { AV_PIX_FMT_0RGB,    AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_RGB32   },
> -    { AV_PIX_FMT_GRAY8,   AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_GREY    },
> +    { AV_PIX_FMT_BGR0,    AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_BGR32   },
> +    { AV_PIX_FMT_0RGB,    AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_RGB32   },
> +    { AV_PIX_FMT_GRAY8,   AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_GREY    },
> #ifdef V4L2_PIX_FMT_Y16
> -    { AV_PIX_FMT_GRAY16LE,AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_Y16     },
> +    { AV_PIX_FMT_GRAY16LE,AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_Y16     },
> #endif
> #ifdef V4L2_PIX_FMT_Z16
> -    { AV_PIX_FMT_GRAY16LE,AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_Z16     },
> +    { AV_PIX_FMT_GRAY16LE,AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_Z16     },
> #endif
> -    { AV_PIX_FMT_NV12,    AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_NV12    },
> -    { AV_PIX_FMT_NONE,    AV_CODEC_ID_MJPEG,    V4L2_PIX_FMT_MJPEG   },
> -    { AV_PIX_FMT_NONE,    AV_CODEC_ID_MJPEG,    V4L2_PIX_FMT_JPEG    },
> +    { AV_PIX_FMT_NV12,    AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_NV12    },
> +    { AV_PIX_FMT_NONE,    AV_CODEC_ID_MJPEG,           V4L2_PIX_FMT_MJPEG   },
> +    { AV_PIX_FMT_NONE,    AV_CODEC_ID_MJPEG,           V4L2_PIX_FMT_JPEG    },
> #ifdef V4L2_PIX_FMT_H264
> -    { AV_PIX_FMT_NONE,    AV_CODEC_ID_H264,     V4L2_PIX_FMT_H264    },
> +    { AV_PIX_FMT_NONE,    AV_CODEC_ID_H264,            V4L2_PIX_FMT_H264    },
> #endif
> #ifdef V4L2_PIX_FMT_MPEG4
> -    { AV_PIX_FMT_NONE,    AV_CODEC_ID_MPEG4,    V4L2_PIX_FMT_MPEG4   },
> +    { AV_PIX_FMT_NONE,    AV_CODEC_ID_MPEG4,           V4L2_PIX_FMT_MPEG4   },
> #endif
> #ifdef V4L2_PIX_FMT_CPIA1
> -    { AV_PIX_FMT_NONE,    AV_CODEC_ID_CPIA,     V4L2_PIX_FMT_CPIA1   },
> +    { AV_PIX_FMT_NONE,    AV_CODEC_ID_CPIA,            V4L2_PIX_FMT_CPIA1   },
> #endif
> #ifdef V4L2_PIX_FMT_SRGGB8
> -    { AV_PIX_FMT_BAYER_BGGR8, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_SBGGR8 },
> -    { AV_PIX_FMT_BAYER_GBRG8, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_SGBRG8 },
> -    { AV_PIX_FMT_BAYER_GRBG8, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_SGRBG8 },
> -    { AV_PIX_FMT_BAYER_RGGB8, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_SRGGB8 },
> +    { AV_PIX_FMT_BAYER_BGGR8, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_SBGGR8 },
> +    { AV_PIX_FMT_BAYER_GBRG8, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_SGBRG8 },
> +    { AV_PIX_FMT_BAYER_GRBG8, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_SGRBG8 },
> +    { AV_PIX_FMT_BAYER_RGGB8, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_SRGGB8 },
> #endif
>     { AV_PIX_FMT_NONE,    AV_CODEC_ID_NONE,     0                    },
> };
> diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c
> index 5e85d1a2b34e..534aa79b639e 100644
> --- a/libavdevice/v4l2.c
> +++ b/libavdevice/v4l2.c
> @@ -83,7 +83,10 @@ struct video_data {
>     AVClass *class;
>     int fd;
>     int pixelformat; /* V4L2_PIX_FMT_* */
> +    int pix_fmt;     /* AV_PIX_FMT_* */
>     int width, height;
> +    int bytesperline;
> +    int linesize[AV_NUM_DATA_POINTERS];
>     int frame_size;
>     int interlaced;
>     int top_field_first;
> @@ -240,6 +243,7 @@ static int device_init(AVFormatContext *ctx, int *width, int *height,
>         s->interlaced = 1;
>     }
> 
> +    s->bytesperline = fmt.fmt.pix.bytesperline;
>     return res;
> }
> 
> @@ -501,9 +505,18 @@ static int convert_timestamp(AVFormatContext *ctx, int64_t *ts)
>     return 0;
> }
> 
> +static void v4l2_free_frame(void *opaque, uint8_t *data)
> +{
> +    AVFrame *frame = (AVFrame*)data;
> +
> +    av_frame_free(&frame);
> +}
> +
> static int mmap_read_frame(AVFormatContext *ctx, AVPacket *pkt)
> {
>     struct video_data *s = ctx->priv_data;
> +    struct AVBufferRef *avbuf = NULL;
> +    struct AVFrame *frame = NULL;
>     struct v4l2_buffer buf = {
>         .type   = V4L2_BUF_TYPE_VIDEO_CAPTURE,
>         .memory = V4L2_MEMORY_MMAP
> @@ -560,13 +573,13 @@ static int mmap_read_frame(AVFormatContext *ctx, AVPacket *pkt)
>     /* Image is at s->buff_start[buf.index] */
>     if (atomic_load(&s->buffers_queued) == FFMAX(s->buffers / 8, 1)) {
>         /* when we start getting low on queued buffers, fall back on copying data */
> -        res = av_new_packet(pkt, buf.bytesused);
> -        if (res < 0) {
> -            av_log(ctx, AV_LOG_ERROR, "Error allocating a packet.\n");
> +        avbuf = av_buffer_alloc(buf.bytesused);
> +        if (!avbuf) {
> +            av_log(ctx, AV_LOG_ERROR, "Error allocating a buffer.\n");
>             enqueue_buffer(s, &buf);
>             return res;
>         }
> -        memcpy(pkt->data, s->buf_start[buf.index], buf.bytesused);
> +        memcpy(avbuf->data, s->buf_start[buf.index], buf.bytesused);
> 
>         res = enqueue_buffer(s, &buf);
>         if (res) {
> @@ -576,9 +589,6 @@ static int mmap_read_frame(AVFormatContext *ctx, AVPacket *pkt)
>     } else {
>         struct buff_data *buf_descriptor;
> 
> -        pkt->data     = s->buf_start[buf.index];
> -        pkt->size     = buf.bytesused;
> -
>         buf_descriptor = av_malloc(sizeof(struct buff_data));
>         if (!buf_descriptor) {
>             /* Something went wrong... Since av_malloc() failed, we cannot even
> @@ -592,19 +602,65 @@ static int mmap_read_frame(AVFormatContext *ctx, AVPacket *pkt)
>         buf_descriptor->index = buf.index;
>         buf_descriptor->s     = s;
> 
> -        pkt->buf = av_buffer_create(pkt->data, pkt->size, mmap_release_buffer,
> -                                    buf_descriptor, 0);
> -        if (!pkt->buf) {
> +        avbuf = av_buffer_create(s->buf_start[buf.index], buf.bytesused,
> +                                 mmap_release_buffer, buf_descriptor, 0);
> +        if (!avbuf) {
>             av_log(ctx, AV_LOG_ERROR, "Failed to create a buffer\n");
>             enqueue_buffer(s, &buf);
>             av_freep(&buf_descriptor);
>             return AVERROR(ENOMEM);
>         }
>     }
> +
> +    if (ctx->video_codec_id == AV_CODEC_ID_WRAPPED_AVFRAME) {
> +        frame = av_frame_alloc();
> +
> +        if (!frame) {
> +            av_log(ctx, AV_LOG_ERROR, "Failed to create an AVFrame\n");
> +            goto err_free;
> +        }
> +
> +        frame->buf[0] = avbuf;
> +
> +        memcpy(frame->linesize, s->linesize, sizeof(s->linesize));
> +        res = av_image_fill_pointers(frame->data, s->pix_fmt, s->height,
> +                                     avbuf->data, frame->linesize);
> +        if (res < 0) {
> +            av_log(ctx, AV_LOG_ERROR, "Failed to compute data pointers\n");
> +            goto err_free;
> +        }
> +
> +        frame->format  = s->pix_fmt;
> +        frame->width   = s->width;
> +        frame->height  = s->height;
> +
> +        pkt->buf = av_buffer_create((uint8_t*)frame, sizeof(*frame),
> +                                    &v4l2_free_frame, ctx, 0);
> +        if (!pkt->buf) {
> +            av_log(ctx, AV_LOG_ERROR, "Failed to create an AVBuffer\n");
> +            goto err_free;
> +        }
> +
> +        pkt->data   = (uint8_t*)frame;
> +        pkt->size   = sizeof(*frame);
> +        pkt->flags |= AV_PKT_FLAG_TRUSTED;
> +        frame = NULL;
> +    } else {
> +        pkt->buf      = avbuf;
> +        pkt->data     = avbuf->data;
> +        pkt->size     = buf.bytesused;
> +        avbuf = NULL;
> +    }
> +
>     pkt->pts = buf_ts.tv_sec * INT64_C(1000000) + buf_ts.tv_usec;
>     convert_timestamp(ctx, &pkt->pts);
> 
>     return pkt->size;
> +
> +err_free:
> +    av_buffer_unref(&avbuf);
> +    av_frame_unref(frame);
> +    return AVERROR(ENOMEM);
> }
> 
> static int mmap_start(AVFormatContext *ctx)
> @@ -957,9 +1013,56 @@ static int v4l2_read_header(AVFormatContext *ctx)
>         goto fail;
> 
>     st->codecpar->format = ff_fmt_v4l2ff(desired_format, codec_id);
> -    if (st->codecpar->format != AV_PIX_FMT_NONE)
> -        s->frame_size = av_image_get_buffer_size(st->codecpar->format,
> -                                                 s->width, s->height, 1);
> +    s->pix_fmt = st->codecpar->format;
> +
> +    if (st->codecpar->format != AV_PIX_FMT_NONE) {
> +        size_t sizes[4];
> +        ptrdiff_t linesize[4];
> +        const AVPixFmtDescriptor *desc;
> +        int max_step     [4];
> +        int max_step_comp[4];
> +
> +        /*
> +         * Per V4L2 spec, for the single plane API the line strides are always
> +         * just divided down by the subsampling factor for chroma planes.
> +         *
> +         * For example, for NV12, plane 1 contains UV components at half
> +         * resolution, and therefore has an equal line stride to plane 0.
> +         */
> +        desc = av_pix_fmt_desc_get(st->codecpar->format);
> +        av_image_fill_max_pixsteps(max_step, max_step_comp, desc);
> +
> +        if (!s->bytesperline) {
> +            av_log(ctx, AV_LOG_WARNING,
> +                   "The V4L2 driver did not set bytesperline, guessing.\n");
> +            s->bytesperline = s->width * max_step[0];
> +        }
> +
> +        s->linesize[0] = s->bytesperline;
> +        for (int i = 1; i < 4; i++) {
> +            int sh = (max_step_comp[i] == 1 || max_step_comp[i] == 2) ? desc->log2_chroma_w : 0;
> +            s->linesize[i] = (s->linesize[0] * max_step[i] / max_step[0]) >> sh;
> +        }
> +
> +        /* Convert since the types are mismatched... */
> +        for (int i = 0; i < 4; i++)
> +            linesize[i] = s->linesize[i];
> +
> +        res = av_image_fill_plane_sizes(sizes, s->pix_fmt, s->height, linesize);
> +        if (res < 0) {
> +            av_log(ctx, AV_LOG_ERROR, "failed to fill plane sizes\n");
> +            goto fail;
> +        }
> +
> +        s->frame_size = 0;
> +        for (int i = 0; i < 4; i++) {
> +            if (sizes[i] > INT_MAX - s->frame_size) {
> +                res = -EINVAL;
> +                goto fail;
> +            }
> +            s->frame_size += sizes[i];
> +        }
> +    }
> 
>     if ((res = mmap_init(ctx)) ||
>         (res = mmap_start(ctx)) < 0)
> @@ -969,16 +1072,9 @@ static int v4l2_read_header(AVFormatContext *ctx)
> 
>     st->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
>     st->codecpar->codec_id = codec_id;
> -    if (codec_id == AV_CODEC_ID_RAWVIDEO)
> -        st->codecpar->codec_tag =
> -            avcodec_pix_fmt_to_codec_tag(st->codecpar->format);
> -    else if (codec_id == AV_CODEC_ID_H264) {
> +    if (codec_id == AV_CODEC_ID_H264) {
>         avpriv_stream_set_need_parsing(st, AVSTREAM_PARSE_FULL_ONCE);
>     }
> -    if (desired_format == V4L2_PIX_FMT_YVU420)
> -        st->codecpar->codec_tag = MKTAG('Y', 'V', '1', '2');
> -    else if (desired_format == V4L2_PIX_FMT_YVU410)
> -        st->codecpar->codec_tag = MKTAG('Y', 'V', 'U', '9');
>     st->codecpar->width = s->width;
>     st->codecpar->height = s->height;
>     if (st->avg_frame_rate.den)
> 
> ---
> base-commit: b643af4acb471c3cb09b02afcc511498c9674347
> change-id: 20230929-v4l2-strides-443dc65b5cb8
> 
> Thank you,
> ~~ Lina
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

LGTM; I'll wait a week and apply assuming nobody has any issues with this.
Andreas Rheinhardt Sept. 29, 2023, 11:03 a.m. UTC | #2
Asahi Lina via ffmpeg-devel:
> V4L2 provides a line stride to the client for hardware that has
> alignment requirements. rawvideo cannot represent this, so switch to
> wrapped_avframe for raw video formats and calculate the plane strides
> manually.
> 
> This is slightly messy because the existing helper APIs expect
> dimensions and an alignment value, while v4l2 provides the stride of
> plane 0 and the subsequent plane strides are implied, so we need to
> open-code the logic to calculate the plane strides.
> 
> This makes vertical video work properly on Apple Macs with "1080p"
> cameras, which are actually square and can support resolutions like
> 1080x1920, which require stride padding to a multiple of 64 bytes.
> 
> In principle, this could be extended to support the V4L2 multiplanar
> API, though there seem to be practically no capture (not M2M) drivers
> that support this, so it's not terribly useful right now.
> 
> Signed-off-by: Asahi Lina <lina@asahilina.net>
> ---
>  libavdevice/v4l2-common.c |  68 +++++++++++------------
>  libavdevice/v4l2.c        | 138 +++++++++++++++++++++++++++++++++++++++-------
>  2 files changed, 151 insertions(+), 55 deletions(-)
> 
> diff --git a/libavdevice/v4l2-common.c b/libavdevice/v4l2-common.c
> index b5b4448a3154..944ffe3d87e1 100644
> --- a/libavdevice/v4l2-common.c
> +++ b/libavdevice/v4l2-common.c
> @@ -19,53 +19,53 @@
>  #include "v4l2-common.h"
>  
>  const struct fmt_map ff_fmt_conversion_table[] = {
> -    //ff_fmt              codec_id              v4l2_fmt
> -    { AV_PIX_FMT_YUV420P, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_YUV420  },
> -    { AV_PIX_FMT_YUV420P, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_YVU420  },
> -    { AV_PIX_FMT_YUV422P, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_YUV422P },
> -    { AV_PIX_FMT_YUYV422, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_YUYV    },
> -    { AV_PIX_FMT_UYVY422, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_UYVY    },
> -    { AV_PIX_FMT_YUV411P, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_YUV411P },
> -    { AV_PIX_FMT_YUV410P, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_YUV410  },
> -    { AV_PIX_FMT_YUV410P, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_YVU410  },
> -    { AV_PIX_FMT_RGB555LE,AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_RGB555  },
> -    { AV_PIX_FMT_RGB555BE,AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_RGB555X },
> -    { AV_PIX_FMT_RGB565LE,AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_RGB565  },
> -    { AV_PIX_FMT_RGB565BE,AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_RGB565X },
> -    { AV_PIX_FMT_BGR24,   AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_BGR24   },
> -    { AV_PIX_FMT_RGB24,   AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_RGB24   },
> +    //ff_fmt              codec_id                     v4l2_fmt
> +    { AV_PIX_FMT_YUV420P, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_YUV420  },
> +    { AV_PIX_FMT_YUV420P, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_YVU420  },
> +    { AV_PIX_FMT_YUV422P, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_YUV422P },
> +    { AV_PIX_FMT_YUYV422, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_YUYV    },
> +    { AV_PIX_FMT_UYVY422, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_UYVY    },
> +    { AV_PIX_FMT_YUV411P, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_YUV411P },
> +    { AV_PIX_FMT_YUV410P, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_YUV410  },
> +    { AV_PIX_FMT_YUV410P, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_YVU410  },
> +    { AV_PIX_FMT_RGB555LE,AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_RGB555  },
> +    { AV_PIX_FMT_RGB555BE,AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_RGB555X },
> +    { AV_PIX_FMT_RGB565LE,AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_RGB565  },
> +    { AV_PIX_FMT_RGB565BE,AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_RGB565X },
> +    { AV_PIX_FMT_BGR24,   AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_BGR24   },
> +    { AV_PIX_FMT_RGB24,   AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_RGB24   },
>  #ifdef V4L2_PIX_FMT_XBGR32
> -    { AV_PIX_FMT_BGR0,    AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_XBGR32  },
> -    { AV_PIX_FMT_0RGB,    AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_XRGB32  },
> -    { AV_PIX_FMT_BGRA,    AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_ABGR32  },
> -    { AV_PIX_FMT_ARGB,    AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_ARGB32  },
> +    { AV_PIX_FMT_BGR0,    AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_XBGR32  },
> +    { AV_PIX_FMT_0RGB,    AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_XRGB32  },
> +    { AV_PIX_FMT_BGRA,    AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_ABGR32  },
> +    { AV_PIX_FMT_ARGB,    AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_ARGB32  },
>  #endif
> -    { AV_PIX_FMT_BGR0,    AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_BGR32   },
> -    { AV_PIX_FMT_0RGB,    AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_RGB32   },
> -    { AV_PIX_FMT_GRAY8,   AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_GREY    },
> +    { AV_PIX_FMT_BGR0,    AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_BGR32   },
> +    { AV_PIX_FMT_0RGB,    AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_RGB32   },
> +    { AV_PIX_FMT_GRAY8,   AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_GREY    },
>  #ifdef V4L2_PIX_FMT_Y16
> -    { AV_PIX_FMT_GRAY16LE,AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_Y16     },
> +    { AV_PIX_FMT_GRAY16LE,AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_Y16     },
>  #endif
>  #ifdef V4L2_PIX_FMT_Z16
> -    { AV_PIX_FMT_GRAY16LE,AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_Z16     },
> +    { AV_PIX_FMT_GRAY16LE,AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_Z16     },
>  #endif
> -    { AV_PIX_FMT_NV12,    AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_NV12    },
> -    { AV_PIX_FMT_NONE,    AV_CODEC_ID_MJPEG,    V4L2_PIX_FMT_MJPEG   },
> -    { AV_PIX_FMT_NONE,    AV_CODEC_ID_MJPEG,    V4L2_PIX_FMT_JPEG    },
> +    { AV_PIX_FMT_NV12,    AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_NV12    },
> +    { AV_PIX_FMT_NONE,    AV_CODEC_ID_MJPEG,           V4L2_PIX_FMT_MJPEG   },
> +    { AV_PIX_FMT_NONE,    AV_CODEC_ID_MJPEG,           V4L2_PIX_FMT_JPEG    },
>  #ifdef V4L2_PIX_FMT_H264
> -    { AV_PIX_FMT_NONE,    AV_CODEC_ID_H264,     V4L2_PIX_FMT_H264    },
> +    { AV_PIX_FMT_NONE,    AV_CODEC_ID_H264,            V4L2_PIX_FMT_H264    },
>  #endif
>  #ifdef V4L2_PIX_FMT_MPEG4
> -    { AV_PIX_FMT_NONE,    AV_CODEC_ID_MPEG4,    V4L2_PIX_FMT_MPEG4   },
> +    { AV_PIX_FMT_NONE,    AV_CODEC_ID_MPEG4,           V4L2_PIX_FMT_MPEG4   },
>  #endif
>  #ifdef V4L2_PIX_FMT_CPIA1
> -    { AV_PIX_FMT_NONE,    AV_CODEC_ID_CPIA,     V4L2_PIX_FMT_CPIA1   },
> +    { AV_PIX_FMT_NONE,    AV_CODEC_ID_CPIA,            V4L2_PIX_FMT_CPIA1   },
>  #endif
>  #ifdef V4L2_PIX_FMT_SRGGB8
> -    { AV_PIX_FMT_BAYER_BGGR8, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_SBGGR8 },
> -    { AV_PIX_FMT_BAYER_GBRG8, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_SGBRG8 },
> -    { AV_PIX_FMT_BAYER_GRBG8, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_SGRBG8 },
> -    { AV_PIX_FMT_BAYER_RGGB8, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_SRGGB8 },
> +    { AV_PIX_FMT_BAYER_BGGR8, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_SBGGR8 },
> +    { AV_PIX_FMT_BAYER_GBRG8, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_SGBRG8 },
> +    { AV_PIX_FMT_BAYER_GRBG8, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_SGRBG8 },
> +    { AV_PIX_FMT_BAYER_RGGB8, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_SRGGB8 },
>  #endif
>      { AV_PIX_FMT_NONE,    AV_CODEC_ID_NONE,     0                    },
>  };
> diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c
> index 5e85d1a2b34e..534aa79b639e 100644
> --- a/libavdevice/v4l2.c
> +++ b/libavdevice/v4l2.c
> @@ -83,7 +83,10 @@ struct video_data {
>      AVClass *class;
>      int fd;
>      int pixelformat; /* V4L2_PIX_FMT_* */
> +    int pix_fmt;     /* AV_PIX_FMT_* */
>      int width, height;
> +    int bytesperline;
> +    int linesize[AV_NUM_DATA_POINTERS];
>      int frame_size;
>      int interlaced;
>      int top_field_first;
> @@ -240,6 +243,7 @@ static int device_init(AVFormatContext *ctx, int *width, int *height,
>          s->interlaced = 1;
>      }
>  
> +    s->bytesperline = fmt.fmt.pix.bytesperline;
>      return res;
>  }
>  
> @@ -501,9 +505,18 @@ static int convert_timestamp(AVFormatContext *ctx, int64_t *ts)
>      return 0;
>  }
>  
> +static void v4l2_free_frame(void *opaque, uint8_t *data)
> +{
> +    AVFrame *frame = (AVFrame*)data;
> +
> +    av_frame_free(&frame);

You should include libavutil/frame.h for this; don't rely on implicit
inclusions via avcodec.h.

> +}
> +
>  static int mmap_read_frame(AVFormatContext *ctx, AVPacket *pkt)
>  {
>      struct video_data *s = ctx->priv_data;
> +    struct AVBufferRef *avbuf = NULL;
> +    struct AVFrame *frame = NULL;
>      struct v4l2_buffer buf = {
>          .type   = V4L2_BUF_TYPE_VIDEO_CAPTURE,
>          .memory = V4L2_MEMORY_MMAP
> @@ -560,13 +573,13 @@ static int mmap_read_frame(AVFormatContext *ctx, AVPacket *pkt)
>      /* Image is at s->buff_start[buf.index] */
>      if (atomic_load(&s->buffers_queued) == FFMAX(s->buffers / 8, 1)) {
>          /* when we start getting low on queued buffers, fall back on copying data */
> -        res = av_new_packet(pkt, buf.bytesused);
> -        if (res < 0) {
> -            av_log(ctx, AV_LOG_ERROR, "Error allocating a packet.\n");
> +        avbuf = av_buffer_alloc(buf.bytesused);

1. This is missing the padding. av_new_packet() exists for a reason.

> +        if (!avbuf) {
> +            av_log(ctx, AV_LOG_ERROR, "Error allocating a buffer.\n");
>              enqueue_buffer(s, &buf);
>              return res;
>          }
> -        memcpy(pkt->data, s->buf_start[buf.index], buf.bytesused);
> +        memcpy(avbuf->data, s->buf_start[buf.index], buf.bytesused);
>  
>          res = enqueue_buffer(s, &buf);
>          if (res) {
> @@ -576,9 +589,6 @@ static int mmap_read_frame(AVFormatContext *ctx, AVPacket *pkt)
>      } else {
>          struct buff_data *buf_descriptor;
>  
> -        pkt->data     = s->buf_start[buf.index];
> -        pkt->size     = buf.bytesused;
> -
>          buf_descriptor = av_malloc(sizeof(struct buff_data));
>          if (!buf_descriptor) {
>              /* Something went wrong... Since av_malloc() failed, we cannot even
> @@ -592,19 +602,65 @@ static int mmap_read_frame(AVFormatContext *ctx, AVPacket *pkt)
>          buf_descriptor->index = buf.index;
>          buf_descriptor->s     = s;
>  
> -        pkt->buf = av_buffer_create(pkt->data, pkt->size, mmap_release_buffer,
> -                                    buf_descriptor, 0);
> -        if (!pkt->buf) {
> +        avbuf = av_buffer_create(s->buf_start[buf.index], buf.bytesused,
> +                                 mmap_release_buffer, buf_descriptor, 0);
> +        if (!avbuf) {
>              av_log(ctx, AV_LOG_ERROR, "Failed to create a buffer\n");
>              enqueue_buffer(s, &buf);
>              av_freep(&buf_descriptor);
>              return AVERROR(ENOMEM);
>          }
>      }
> +
> +    if (ctx->video_codec_id == AV_CODEC_ID_WRAPPED_AVFRAME) {
> +        frame = av_frame_alloc();
> +
> +        if (!frame) {
> +            av_log(ctx, AV_LOG_ERROR, "Failed to create an AVFrame\n");
> +            goto err_free;
> +        }
> +
> +        frame->buf[0] = avbuf;

2. You are moving ownership of avbuf to the frame here and therefore the
av_frame_unref() will free avbuf in the err_free codepath on error, but
it has already been freed in av_buffer_unref().

> +
> +        memcpy(frame->linesize, s->linesize, sizeof(s->linesize));
> +        res = av_image_fill_pointers(frame->data, s->pix_fmt, s->height,
> +                                     avbuf->data, frame->linesize);
> +        if (res < 0) {
> +            av_log(ctx, AV_LOG_ERROR, "Failed to compute data pointers\n");
> +            goto err_free;
> +        }
> +
> +        frame->format  = s->pix_fmt;
> +        frame->width   = s->width;
> +        frame->height  = s->height;
> +
> +        pkt->buf = av_buffer_create((uint8_t*)frame, sizeof(*frame),
> +                                    &v4l2_free_frame, ctx, 0);

There is no need to use an opaque here at all as v4l2_free_frame doesn't
use it at all; in fact, this buffer might outlive ctx, which is all the
more reason not to use ctx as opaque.

> +        if (!pkt->buf) {
> +            av_log(ctx, AV_LOG_ERROR, "Failed to create an AVBuffer\n");
> +            goto err_free;
> +        }
> +
> +        pkt->data   = (uint8_t*)frame;
> +        pkt->size   = sizeof(*frame);
> +        pkt->flags |= AV_PKT_FLAG_TRUSTED;
> +        frame = NULL;
> +    } else {
> +        pkt->buf      = avbuf;
> +        pkt->data     = avbuf->data;
> +        pkt->size     = buf.bytesused;
> +        avbuf = NULL;
> +    }
> +
>      pkt->pts = buf_ts.tv_sec * INT64_C(1000000) + buf_ts.tv_usec;
>      convert_timestamp(ctx, &pkt->pts);
>  
>      return pkt->size;
> +
> +err_free:
> +    av_buffer_unref(&avbuf);
> +    av_frame_unref(frame);

3. This may call av_frame_unref(NULL) in case the AVFrame could not be
allocated, which currently works, but is not guaranteed to do so (I
consider av_frame_unref(NULL) a programmer error that should not exist
and we should just let it crash). It also leaks the AVFrame in case
av_image_fill_pointers() or av_buffer_create() fails.

To fix 2. and 3., you can proceed as follows: Call
av_image_fill_pointers() before allocating the frame (store the data
pointers on the stack), then allocate the frame, then wrap it into a
buffer. If wrapping it in a buffer fails, you free the frame. You do not
free it on all other error paths; actually, you should make the AVFrame*
local to the block where you allocate it.

(This is not to say that I actually like the whole wrapped avframe hack.)

- Andreas
Anton Khirnov Oct. 2, 2023, 10:23 a.m. UTC | #3
Quoting Asahi Lina via ffmpeg-devel (2023-09-29 09:52:23)
> V4L2 provides a line stride to the client for hardware that has
> alignment requirements. rawvideo cannot represent this, so switch to
> wrapped_avframe for raw video formats and calculate the plane strides
> manually.
> 
> This is slightly messy because the existing helper APIs expect
> dimensions and an alignment value, while v4l2 provides the stride of
> plane 0 and the subsequent plane strides are implied, so we need to
> open-code the logic to calculate the plane strides.
> 
> This makes vertical video work properly on Apple Macs with "1080p"
> cameras, which are actually square and can support resolutions like
> 1080x1920, which require stride padding to a multiple of 64 bytes.
> 
> In principle, this could be extended to support the V4L2 multiplanar
> API, though there seem to be practically no capture (not M2M) drivers
> that support this, so it's not terribly useful right now.
> 
> Signed-off-by: Asahi Lina <lina@asahilina.net>
> ---
>  libavdevice/v4l2-common.c |  68 +++++++++++------------
>  libavdevice/v4l2.c        | 138 +++++++++++++++++++++++++++++++++++++++-------
>  2 files changed, 151 insertions(+), 55 deletions(-)

Wrapped AVFrames is an abominable hack that should be going away, not
spread to new places. The correct approach is move v4l2 rawvideo to a
lavfi source filter, then none of these hacks are needed.
Paul B Mahol Oct. 2, 2023, 3:46 p.m. UTC | #4
On 10/2/23, Anton Khirnov <anton@khirnov.net> wrote:
> Quoting Asahi Lina via ffmpeg-devel (2023-09-29 09:52:23)
>> V4L2 provides a line stride to the client for hardware that has
>> alignment requirements. rawvideo cannot represent this, so switch to
>> wrapped_avframe for raw video formats and calculate the plane strides
>> manually.
>>
>> This is slightly messy because the existing helper APIs expect
>> dimensions and an alignment value, while v4l2 provides the stride of
>> plane 0 and the subsequent plane strides are implied, so we need to
>> open-code the logic to calculate the plane strides.
>>
>> This makes vertical video work properly on Apple Macs with "1080p"
>> cameras, which are actually square and can support resolutions like
>> 1080x1920, which require stride padding to a multiple of 64 bytes.
>>
>> In principle, this could be extended to support the V4L2 multiplanar
>> API, though there seem to be practically no capture (not M2M) drivers
>> that support this, so it's not terribly useful right now.
>>
>> Signed-off-by: Asahi Lina <lina@asahilina.net>
>> ---
>>  libavdevice/v4l2-common.c |  68 +++++++++++------------
>>  libavdevice/v4l2.c        | 138
>> +++++++++++++++++++++++++++++++++++++++-------
>>  2 files changed, 151 insertions(+), 55 deletions(-)
>
> Wrapped AVFrames is an abominable hack that should be going away, not
> spread to new places. The correct approach is move v4l2 rawvideo to a
> lavfi source filter, then none of these hacks are needed.

But that would break SDR seeking.
And also break bunch of avdevice, and make Nicolas sad.

>
> --
> Anton Khirnov
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Nicolas George Oct. 2, 2023, 4:54 p.m. UTC | #5
Paul B Mahol (12023-10-02):
> But that would break SDR seeking.
> And also break bunch of avdevice, and make Nicolas sad.

I would be very happy if devices were turned into lavfi sources and
sink. So if you're unable to make sarcasm that's actually relevant, next
time just shut up.

Anyway:

(1) It is not happening anytime soon, and blocking an immediate feature
for a distant refactoring is not acceptable in FFmpeg, only in avconv.

(2) the main advertised API for applications is not lavfi+movie, it is
lavf+lavc, so we have >15 years of applications written like that, and
we still need a mechanism to make filters and devices transparently
available in them.
diff mbox series

Patch

diff --git a/libavdevice/v4l2-common.c b/libavdevice/v4l2-common.c
index b5b4448a3154..944ffe3d87e1 100644
--- a/libavdevice/v4l2-common.c
+++ b/libavdevice/v4l2-common.c
@@ -19,53 +19,53 @@ 
 #include "v4l2-common.h"
 
 const struct fmt_map ff_fmt_conversion_table[] = {
-    //ff_fmt              codec_id              v4l2_fmt
-    { AV_PIX_FMT_YUV420P, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_YUV420  },
-    { AV_PIX_FMT_YUV420P, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_YVU420  },
-    { AV_PIX_FMT_YUV422P, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_YUV422P },
-    { AV_PIX_FMT_YUYV422, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_YUYV    },
-    { AV_PIX_FMT_UYVY422, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_UYVY    },
-    { AV_PIX_FMT_YUV411P, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_YUV411P },
-    { AV_PIX_FMT_YUV410P, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_YUV410  },
-    { AV_PIX_FMT_YUV410P, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_YVU410  },
-    { AV_PIX_FMT_RGB555LE,AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_RGB555  },
-    { AV_PIX_FMT_RGB555BE,AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_RGB555X },
-    { AV_PIX_FMT_RGB565LE,AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_RGB565  },
-    { AV_PIX_FMT_RGB565BE,AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_RGB565X },
-    { AV_PIX_FMT_BGR24,   AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_BGR24   },
-    { AV_PIX_FMT_RGB24,   AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_RGB24   },
+    //ff_fmt              codec_id                     v4l2_fmt
+    { AV_PIX_FMT_YUV420P, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_YUV420  },
+    { AV_PIX_FMT_YUV420P, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_YVU420  },
+    { AV_PIX_FMT_YUV422P, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_YUV422P },
+    { AV_PIX_FMT_YUYV422, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_YUYV    },
+    { AV_PIX_FMT_UYVY422, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_UYVY    },
+    { AV_PIX_FMT_YUV411P, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_YUV411P },
+    { AV_PIX_FMT_YUV410P, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_YUV410  },
+    { AV_PIX_FMT_YUV410P, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_YVU410  },
+    { AV_PIX_FMT_RGB555LE,AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_RGB555  },
+    { AV_PIX_FMT_RGB555BE,AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_RGB555X },
+    { AV_PIX_FMT_RGB565LE,AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_RGB565  },
+    { AV_PIX_FMT_RGB565BE,AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_RGB565X },
+    { AV_PIX_FMT_BGR24,   AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_BGR24   },
+    { AV_PIX_FMT_RGB24,   AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_RGB24   },
 #ifdef V4L2_PIX_FMT_XBGR32
-    { AV_PIX_FMT_BGR0,    AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_XBGR32  },
-    { AV_PIX_FMT_0RGB,    AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_XRGB32  },
-    { AV_PIX_FMT_BGRA,    AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_ABGR32  },
-    { AV_PIX_FMT_ARGB,    AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_ARGB32  },
+    { AV_PIX_FMT_BGR0,    AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_XBGR32  },
+    { AV_PIX_FMT_0RGB,    AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_XRGB32  },
+    { AV_PIX_FMT_BGRA,    AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_ABGR32  },
+    { AV_PIX_FMT_ARGB,    AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_ARGB32  },
 #endif
-    { AV_PIX_FMT_BGR0,    AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_BGR32   },
-    { AV_PIX_FMT_0RGB,    AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_RGB32   },
-    { AV_PIX_FMT_GRAY8,   AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_GREY    },
+    { AV_PIX_FMT_BGR0,    AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_BGR32   },
+    { AV_PIX_FMT_0RGB,    AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_RGB32   },
+    { AV_PIX_FMT_GRAY8,   AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_GREY    },
 #ifdef V4L2_PIX_FMT_Y16
-    { AV_PIX_FMT_GRAY16LE,AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_Y16     },
+    { AV_PIX_FMT_GRAY16LE,AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_Y16     },
 #endif
 #ifdef V4L2_PIX_FMT_Z16
-    { AV_PIX_FMT_GRAY16LE,AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_Z16     },
+    { AV_PIX_FMT_GRAY16LE,AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_Z16     },
 #endif
-    { AV_PIX_FMT_NV12,    AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_NV12    },
-    { AV_PIX_FMT_NONE,    AV_CODEC_ID_MJPEG,    V4L2_PIX_FMT_MJPEG   },
-    { AV_PIX_FMT_NONE,    AV_CODEC_ID_MJPEG,    V4L2_PIX_FMT_JPEG    },
+    { AV_PIX_FMT_NV12,    AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_NV12    },
+    { AV_PIX_FMT_NONE,    AV_CODEC_ID_MJPEG,           V4L2_PIX_FMT_MJPEG   },
+    { AV_PIX_FMT_NONE,    AV_CODEC_ID_MJPEG,           V4L2_PIX_FMT_JPEG    },
 #ifdef V4L2_PIX_FMT_H264
-    { AV_PIX_FMT_NONE,    AV_CODEC_ID_H264,     V4L2_PIX_FMT_H264    },
+    { AV_PIX_FMT_NONE,    AV_CODEC_ID_H264,            V4L2_PIX_FMT_H264    },
 #endif
 #ifdef V4L2_PIX_FMT_MPEG4
-    { AV_PIX_FMT_NONE,    AV_CODEC_ID_MPEG4,    V4L2_PIX_FMT_MPEG4   },
+    { AV_PIX_FMT_NONE,    AV_CODEC_ID_MPEG4,           V4L2_PIX_FMT_MPEG4   },
 #endif
 #ifdef V4L2_PIX_FMT_CPIA1
-    { AV_PIX_FMT_NONE,    AV_CODEC_ID_CPIA,     V4L2_PIX_FMT_CPIA1   },
+    { AV_PIX_FMT_NONE,    AV_CODEC_ID_CPIA,            V4L2_PIX_FMT_CPIA1   },
 #endif
 #ifdef V4L2_PIX_FMT_SRGGB8
-    { AV_PIX_FMT_BAYER_BGGR8, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_SBGGR8 },
-    { AV_PIX_FMT_BAYER_GBRG8, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_SGBRG8 },
-    { AV_PIX_FMT_BAYER_GRBG8, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_SGRBG8 },
-    { AV_PIX_FMT_BAYER_RGGB8, AV_CODEC_ID_RAWVIDEO, V4L2_PIX_FMT_SRGGB8 },
+    { AV_PIX_FMT_BAYER_BGGR8, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_SBGGR8 },
+    { AV_PIX_FMT_BAYER_GBRG8, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_SGBRG8 },
+    { AV_PIX_FMT_BAYER_GRBG8, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_SGRBG8 },
+    { AV_PIX_FMT_BAYER_RGGB8, AV_CODEC_ID_WRAPPED_AVFRAME, V4L2_PIX_FMT_SRGGB8 },
 #endif
     { AV_PIX_FMT_NONE,    AV_CODEC_ID_NONE,     0                    },
 };
diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c
index 5e85d1a2b34e..534aa79b639e 100644
--- a/libavdevice/v4l2.c
+++ b/libavdevice/v4l2.c
@@ -83,7 +83,10 @@  struct video_data {
     AVClass *class;
     int fd;
     int pixelformat; /* V4L2_PIX_FMT_* */
+    int pix_fmt;     /* AV_PIX_FMT_* */
     int width, height;
+    int bytesperline;
+    int linesize[AV_NUM_DATA_POINTERS];
     int frame_size;
     int interlaced;
     int top_field_first;
@@ -240,6 +243,7 @@  static int device_init(AVFormatContext *ctx, int *width, int *height,
         s->interlaced = 1;
     }
 
+    s->bytesperline = fmt.fmt.pix.bytesperline;
     return res;
 }
 
@@ -501,9 +505,18 @@  static int convert_timestamp(AVFormatContext *ctx, int64_t *ts)
     return 0;
 }
 
+static void v4l2_free_frame(void *opaque, uint8_t *data)
+{
+    AVFrame *frame = (AVFrame*)data;
+
+    av_frame_free(&frame);
+}
+
 static int mmap_read_frame(AVFormatContext *ctx, AVPacket *pkt)
 {
     struct video_data *s = ctx->priv_data;
+    struct AVBufferRef *avbuf = NULL;
+    struct AVFrame *frame = NULL;
     struct v4l2_buffer buf = {
         .type   = V4L2_BUF_TYPE_VIDEO_CAPTURE,
         .memory = V4L2_MEMORY_MMAP
@@ -560,13 +573,13 @@  static int mmap_read_frame(AVFormatContext *ctx, AVPacket *pkt)
     /* Image is at s->buff_start[buf.index] */
     if (atomic_load(&s->buffers_queued) == FFMAX(s->buffers / 8, 1)) {
         /* when we start getting low on queued buffers, fall back on copying data */
-        res = av_new_packet(pkt, buf.bytesused);
-        if (res < 0) {
-            av_log(ctx, AV_LOG_ERROR, "Error allocating a packet.\n");
+        avbuf = av_buffer_alloc(buf.bytesused);
+        if (!avbuf) {
+            av_log(ctx, AV_LOG_ERROR, "Error allocating a buffer.\n");
             enqueue_buffer(s, &buf);
             return res;
         }
-        memcpy(pkt->data, s->buf_start[buf.index], buf.bytesused);
+        memcpy(avbuf->data, s->buf_start[buf.index], buf.bytesused);
 
         res = enqueue_buffer(s, &buf);
         if (res) {
@@ -576,9 +589,6 @@  static int mmap_read_frame(AVFormatContext *ctx, AVPacket *pkt)
     } else {
         struct buff_data *buf_descriptor;
 
-        pkt->data     = s->buf_start[buf.index];
-        pkt->size     = buf.bytesused;
-
         buf_descriptor = av_malloc(sizeof(struct buff_data));
         if (!buf_descriptor) {
             /* Something went wrong... Since av_malloc() failed, we cannot even
@@ -592,19 +602,65 @@  static int mmap_read_frame(AVFormatContext *ctx, AVPacket *pkt)
         buf_descriptor->index = buf.index;
         buf_descriptor->s     = s;
 
-        pkt->buf = av_buffer_create(pkt->data, pkt->size, mmap_release_buffer,
-                                    buf_descriptor, 0);
-        if (!pkt->buf) {
+        avbuf = av_buffer_create(s->buf_start[buf.index], buf.bytesused,
+                                 mmap_release_buffer, buf_descriptor, 0);
+        if (!avbuf) {
             av_log(ctx, AV_LOG_ERROR, "Failed to create a buffer\n");
             enqueue_buffer(s, &buf);
             av_freep(&buf_descriptor);
             return AVERROR(ENOMEM);
         }
     }
+
+    if (ctx->video_codec_id == AV_CODEC_ID_WRAPPED_AVFRAME) {
+        frame = av_frame_alloc();
+
+        if (!frame) {
+            av_log(ctx, AV_LOG_ERROR, "Failed to create an AVFrame\n");
+            goto err_free;
+        }
+
+        frame->buf[0] = avbuf;
+
+        memcpy(frame->linesize, s->linesize, sizeof(s->linesize));
+        res = av_image_fill_pointers(frame->data, s->pix_fmt, s->height,
+                                     avbuf->data, frame->linesize);
+        if (res < 0) {
+            av_log(ctx, AV_LOG_ERROR, "Failed to compute data pointers\n");
+            goto err_free;
+        }
+
+        frame->format  = s->pix_fmt;
+        frame->width   = s->width;
+        frame->height  = s->height;
+
+        pkt->buf = av_buffer_create((uint8_t*)frame, sizeof(*frame),
+                                    &v4l2_free_frame, ctx, 0);
+        if (!pkt->buf) {
+            av_log(ctx, AV_LOG_ERROR, "Failed to create an AVBuffer\n");
+            goto err_free;
+        }
+
+        pkt->data   = (uint8_t*)frame;
+        pkt->size   = sizeof(*frame);
+        pkt->flags |= AV_PKT_FLAG_TRUSTED;
+        frame = NULL;
+    } else {
+        pkt->buf      = avbuf;
+        pkt->data     = avbuf->data;
+        pkt->size     = buf.bytesused;
+        avbuf = NULL;
+    }
+
     pkt->pts = buf_ts.tv_sec * INT64_C(1000000) + buf_ts.tv_usec;
     convert_timestamp(ctx, &pkt->pts);
 
     return pkt->size;
+
+err_free:
+    av_buffer_unref(&avbuf);
+    av_frame_unref(frame);
+    return AVERROR(ENOMEM);
 }
 
 static int mmap_start(AVFormatContext *ctx)
@@ -957,9 +1013,56 @@  static int v4l2_read_header(AVFormatContext *ctx)
         goto fail;
 
     st->codecpar->format = ff_fmt_v4l2ff(desired_format, codec_id);
-    if (st->codecpar->format != AV_PIX_FMT_NONE)
-        s->frame_size = av_image_get_buffer_size(st->codecpar->format,
-                                                 s->width, s->height, 1);
+    s->pix_fmt = st->codecpar->format;
+
+    if (st->codecpar->format != AV_PIX_FMT_NONE) {
+        size_t sizes[4];
+        ptrdiff_t linesize[4];
+        const AVPixFmtDescriptor *desc;
+        int max_step     [4];
+        int max_step_comp[4];
+
+        /*
+         * Per V4L2 spec, for the single plane API the line strides are always
+         * just divided down by the subsampling factor for chroma planes.
+         *
+         * For example, for NV12, plane 1 contains UV components at half
+         * resolution, and therefore has an equal line stride to plane 0.
+         */
+        desc = av_pix_fmt_desc_get(st->codecpar->format);
+        av_image_fill_max_pixsteps(max_step, max_step_comp, desc);
+
+        if (!s->bytesperline) {
+            av_log(ctx, AV_LOG_WARNING,
+                   "The V4L2 driver did not set bytesperline, guessing.\n");
+            s->bytesperline = s->width * max_step[0];
+        }
+
+        s->linesize[0] = s->bytesperline;
+        for (int i = 1; i < 4; i++) {
+            int sh = (max_step_comp[i] == 1 || max_step_comp[i] == 2) ? desc->log2_chroma_w : 0;
+            s->linesize[i] = (s->linesize[0] * max_step[i] / max_step[0]) >> sh;
+        }
+
+        /* Convert since the types are mismatched... */
+        for (int i = 0; i < 4; i++)
+            linesize[i] = s->linesize[i];
+
+        res = av_image_fill_plane_sizes(sizes, s->pix_fmt, s->height, linesize);
+        if (res < 0) {
+            av_log(ctx, AV_LOG_ERROR, "failed to fill plane sizes\n");
+            goto fail;
+        }
+
+        s->frame_size = 0;
+        for (int i = 0; i < 4; i++) {
+            if (sizes[i] > INT_MAX - s->frame_size) {
+                res = -EINVAL;
+                goto fail;
+            }
+            s->frame_size += sizes[i];
+        }
+    }
 
     if ((res = mmap_init(ctx)) ||
         (res = mmap_start(ctx)) < 0)
@@ -969,16 +1072,9 @@  static int v4l2_read_header(AVFormatContext *ctx)
 
     st->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
     st->codecpar->codec_id = codec_id;
-    if (codec_id == AV_CODEC_ID_RAWVIDEO)
-        st->codecpar->codec_tag =
-            avcodec_pix_fmt_to_codec_tag(st->codecpar->format);
-    else if (codec_id == AV_CODEC_ID_H264) {
+    if (codec_id == AV_CODEC_ID_H264) {
         avpriv_stream_set_need_parsing(st, AVSTREAM_PARSE_FULL_ONCE);
     }
-    if (desired_format == V4L2_PIX_FMT_YVU420)
-        st->codecpar->codec_tag = MKTAG('Y', 'V', '1', '2');
-    else if (desired_format == V4L2_PIX_FMT_YVU410)
-        st->codecpar->codec_tag = MKTAG('Y', 'V', 'U', '9');
     st->codecpar->width = s->width;
     st->codecpar->height = s->height;
     if (st->avg_frame_rate.den)