diff mbox series

[FFmpeg-devel] avutil/hwcontext_videotoolbox: implement hwupload to convert AVFrame to CVPixelBuffer

Message ID 20210629220816.97586-1-ffmpeg@tmm1.net
State Accepted
Headers show
Series [FFmpeg-devel] avutil/hwcontext_videotoolbox: implement hwupload to convert AVFrame to CVPixelBuffer | expand

Checks

Context Check Description
andriy/x86_make fail Make failed
andriy/PPC64_make success Make finished
andriy/PPC64_make_fate success Make fate finished

Commit Message

Aman Karmani June 29, 2021, 10:08 p.m. UTC
From: Aman Karmani <aman@tmm1.net>

Teach AV_HWDEVICE_TYPE_VIDEOTOOLBOX to be able to create AVFrames of type
AV_PIX_FMT_VIDEOTOOLBOX. This can be used to hwupload a regular AVFrame
into its CVPixelBuffer equivalent.

    ffmpeg -init_hw_device videotoolbox -f lavfi -i color=black:640x480 -vf hwupload -c:v h264_videotoolbox -f null -y /dev/null

Signed-off-by: Aman Karmani <aman@tmm1.net>
---
 libavutil/hwcontext_videotoolbox.c | 418 ++++++++++++++++++++++++++++-
 libavutil/hwcontext_videotoolbox.h |   7 +-
 2 files changed, 420 insertions(+), 5 deletions(-)

Comments

Aman Karmani July 5, 2021, 6:37 p.m. UTC | #1
On Tue, Jun 29, 2021 at 3:08 PM Aman Karmani <ffmpeg@tmm1.net> wrote:

> From: Aman Karmani <aman@tmm1.net>
>
> Teach AV_HWDEVICE_TYPE_VIDEOTOOLBOX to be able to create AVFrames of type
> AV_PIX_FMT_VIDEOTOOLBOX. This can be used to hwupload a regular AVFrame
> into its CVPixelBuffer equivalent.
>
>     ffmpeg -init_hw_device videotoolbox -f lavfi -i color=black:640x480
> -vf hwupload -c:v h264_videotoolbox -f null -y /dev/null


Will merge this in a few days if no one objects.

Do I need to bump avutil patch or minor?

Aman


>
> Signed-off-by: Aman Karmani <aman@tmm1.net>
> ---
>  libavutil/hwcontext_videotoolbox.c | 418 ++++++++++++++++++++++++++++-
>  libavutil/hwcontext_videotoolbox.h |   7 +-
>  2 files changed, 420 insertions(+), 5 deletions(-)
>
> diff --git a/libavutil/hwcontext_videotoolbox.c
> b/libavutil/hwcontext_videotoolbox.c
> index bded9873fe..58095a1fc9 100644
> --- a/libavutil/hwcontext_videotoolbox.c
> +++ b/libavutil/hwcontext_videotoolbox.c
> @@ -24,6 +24,7 @@
>  #include <VideoToolbox/VideoToolbox.h>
>
>  #include "buffer.h"
> +#include "buffer_internal.h"
>  #include "common.h"
>  #include "hwcontext.h"
>  #include "hwcontext_internal.h"
> @@ -32,6 +33,10 @@
>  #include "pixfmt.h"
>  #include "pixdesc.h"
>
> +typedef struct VTFramesContext {
> +    CVPixelBufferPoolRef pool;
> +} VTFramesContext;
> +
>  static const struct {
>      uint32_t cv_fmt;
>      bool full_range;
> @@ -50,6 +55,39 @@ static const struct {
>  #endif
>  };
>
> +static const enum AVPixelFormat supported_formats[] = {
> +    AV_PIX_FMT_NV12,
> +    AV_PIX_FMT_YUV420P,
> +    AV_PIX_FMT_UYVY422,
> +    AV_PIX_FMT_P010,
> +    AV_PIX_FMT_BGRA,
> +};
> +
> +static int vt_frames_get_constraints(AVHWDeviceContext *ctx,
> +                                     const void *hwconfig,
> +                                     AVHWFramesConstraints *constraints)
> +{
> +    int i;
> +
> +    constraints->valid_sw_formats =
> av_malloc_array(FF_ARRAY_ELEMS(supported_formats) + 1,
> +
> sizeof(*constraints->valid_sw_formats));
> +    if (!constraints->valid_sw_formats)
> +        return AVERROR(ENOMEM);
> +
> +    for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
> +        constraints->valid_sw_formats[i] = supported_formats[i];
> +    constraints->valid_sw_formats[FF_ARRAY_ELEMS(supported_formats)] =
> AV_PIX_FMT_NONE;
> +
> +    constraints->valid_hw_formats = av_malloc_array(2,
> sizeof(*constraints->valid_hw_formats));
> +    if (!constraints->valid_hw_formats)
> +        return AVERROR(ENOMEM);
> +
> +    constraints->valid_hw_formats[0] = AV_PIX_FMT_VIDEOTOOLBOX;
> +    constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
> +
> +    return 0;
> +}
> +
>  enum AVPixelFormat av_map_videotoolbox_format_to_pixfmt(uint32_t cv_fmt)
>  {
>      int i;
> @@ -75,11 +113,134 @@ uint32_t
> av_map_videotoolbox_format_from_pixfmt2(enum AVPixelFormat pix_fmt, boo
>      return 0;
>  }
>
> +static int vt_pool_alloc(AVHWFramesContext *ctx)
> +{
> +    VTFramesContext *fctx = ctx->internal->priv;
> +    CVReturn err;
> +    CFNumberRef w, h, pixfmt;
> +    uint32_t cv_pixfmt;
> +    CFMutableDictionaryRef attributes, iosurface_properties;
> +
> +    attributes = CFDictionaryCreateMutable(
> +        NULL,
> +        2,
> +        &kCFTypeDictionaryKeyCallBacks,
> +        &kCFTypeDictionaryValueCallBacks);
> +
> +    cv_pixfmt = av_map_videotoolbox_format_from_pixfmt(ctx->sw_format);
> +    pixfmt = CFNumberCreate(NULL, kCFNumberSInt32Type, &cv_pixfmt);
> +    CFDictionarySetValue(
> +        attributes,
> +        kCVPixelBufferPixelFormatTypeKey,
> +        pixfmt);
> +    CFRelease(pixfmt);
> +
> +    iosurface_properties = CFDictionaryCreateMutable(
> +        NULL,
> +        0,
> +        &kCFTypeDictionaryKeyCallBacks,
> +        &kCFTypeDictionaryValueCallBacks);
> +    CFDictionarySetValue(attributes,
> kCVPixelBufferIOSurfacePropertiesKey, iosurface_properties);
> +    CFRelease(iosurface_properties);
> +
> +    w = CFNumberCreate(NULL, kCFNumberSInt32Type, &ctx->width);
> +    h = CFNumberCreate(NULL, kCFNumberSInt32Type, &ctx->height);
> +    CFDictionarySetValue(attributes, kCVPixelBufferWidthKey, w);
> +    CFDictionarySetValue(attributes, kCVPixelBufferHeightKey, h);
> +    CFRelease(w);
> +    CFRelease(h);
> +
> +    err = CVPixelBufferPoolCreate(
> +        NULL,
> +        NULL,
> +        attributes,
> +        &fctx->pool);
> +    CFRelease(attributes);
> +
> +    if (err == kCVReturnSuccess)
> +        return 0;
> +
> +    av_log(ctx, AV_LOG_ERROR, "Error creating CVPixelBufferPool: %d\n",
> err);
> +    return AVERROR_EXTERNAL;
> +}
> +
> +static AVBufferRef *vt_dummy_pool_alloc(void *opaque, size_t size)
> +{
> +    return NULL;
> +}
> +
> +static void vt_frames_uninit(AVHWFramesContext *ctx)
> +{
> +    VTFramesContext *fctx = ctx->internal->priv;
> +    if (fctx->pool) {
> +        CVPixelBufferPoolRelease(fctx->pool);
> +        fctx->pool = NULL;
> +    }
> +}
> +
> +static int vt_frames_init(AVHWFramesContext *ctx)
> +{
> +    int i, ret;
> +
> +    for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) {
> +        if (ctx->sw_format == supported_formats[i])
> +            break;
> +    }
> +    if (i == FF_ARRAY_ELEMS(supported_formats)) {
> +        av_log(ctx, AV_LOG_ERROR, "Pixel format '%s' is not supported\n",
> +               av_get_pix_fmt_name(ctx->sw_format));
> +        return AVERROR(ENOSYS);
> +    }
> +
> +    // create a dummy pool so av_hwframe_get_buffer doesn't EINVAL
> +    if (!ctx->pool) {
> +        ctx->internal->pool_internal = av_buffer_pool_init2(0, ctx,
> vt_dummy_pool_alloc, NULL);
> +        if (!ctx->internal->pool_internal)
> +            return AVERROR(ENOMEM);
> +    }
> +
> +    ret = vt_pool_alloc(ctx);
> +    if (ret < 0)
> +        return ret;
> +
> +    return 0;
> +}
> +
> +static void videotoolbox_buffer_release(void *opaque, uint8_t *data)
> +{
> +    CVPixelBufferRelease((CVPixelBufferRef)data);
> +}
> +
>  static int vt_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
>  {
> -    frame->buf[0] = av_buffer_pool_get(ctx->pool);
> -    if (!frame->buf[0])
> -        return AVERROR(ENOMEM);
> +    VTFramesContext *fctx = ctx->internal->priv;
> +
> +    if (ctx->pool && ctx->pool->size != 0) {
> +        frame->buf[0] = av_buffer_pool_get(ctx->pool);
> +        if (!frame->buf[0])
> +            return AVERROR(ENOMEM);
> +    } else {
> +        CVPixelBufferRef pixbuf;
> +        AVBufferRef *buf = NULL;
> +        CVReturn err;
> +
> +        err = CVPixelBufferPoolCreatePixelBuffer(
> +            NULL,
> +            fctx->pool,
> +            &pixbuf
> +        );
> +        if (err != kCVReturnSuccess) {
> +            av_log(ctx, AV_LOG_ERROR, "Failed to create pixel buffer from
> pool: %d\n", err);
> +            return AVERROR_EXTERNAL;
> +        }
> +
> +        buf = av_buffer_create((uint8_t *)pixbuf, 1,
> videotoolbox_buffer_release, NULL, 0);
> +        if (!buf) {
> +            CVPixelBufferRelease(pixbuf);
> +            return AVERROR(ENOMEM);
> +        }
> +        frame->buf[0] = buf;
> +    }
>
>      frame->data[3] = frame->buf[0]->data;
>      frame->format  = AV_PIX_FMT_VIDEOTOOLBOX;
> @@ -111,6 +272,248 @@ static void vt_unmap(AVHWFramesContext *ctx,
> HWMapDescriptor *hwmap)
>      CVPixelBufferUnlockBaseAddress(pixbuf, (uintptr_t)hwmap->priv);
>  }
>
> +static int vt_pixbuf_set_par(AVHWFramesContext *hwfc,
> +                             CVPixelBufferRef pixbuf, const AVFrame *src)
> +{
> +    CFMutableDictionaryRef par = NULL;
> +    CFNumberRef num = NULL, den = NULL;
> +    AVRational avpar = src->sample_aspect_ratio;
> +
> +    if (avpar.num == 0)
> +        return 0;
> +
> +    av_reduce(&avpar.num, &avpar.den,
> +                avpar.num, avpar.den,
> +                0xFFFFFFFF);
> +
> +    num = CFNumberCreate(kCFAllocatorDefault,
> +                            kCFNumberIntType,
> +                            &avpar.num);
> +
> +    den = CFNumberCreate(kCFAllocatorDefault,
> +                            kCFNumberIntType,
> +                            &avpar.den);
> +
> +    par = CFDictionaryCreateMutable(kCFAllocatorDefault,
> +                                    2,
> +                                    &kCFCopyStringDictionaryKeyCallBacks,
> +                                    &kCFTypeDictionaryValueCallBacks);
> +
> +    if (!par || !num || !den) {
> +        if (par) CFRelease(par);
> +        if (num) CFRelease(num);
> +        if (den) CFRelease(den);
> +        return AVERROR(ENOMEM);
> +    }
> +
> +    CFDictionarySetValue(
> +        par,
> +        kCVImageBufferPixelAspectRatioHorizontalSpacingKey,
> +        num);
> +    CFDictionarySetValue(
> +        par,
> +        kCVImageBufferPixelAspectRatioVerticalSpacingKey,
> +        den);
> +
> +    CVBufferSetAttachment(
> +        pixbuf,
> +        kCVImageBufferPixelAspectRatioKey,
> +        par,
> +        kCVAttachmentMode_ShouldPropagate
> +    );
> +
> +    CFRelease(par);
> +    CFRelease(num);
> +    CFRelease(den);
> +
> +    return 0;
> +}
> +
> +static int vt_pixbuf_set_chromaloc(AVHWFramesContext *hwfc,
> +                                   CVPixelBufferRef pixbuf, const AVFrame
> *src)
> +{
> +    CFStringRef loc = NULL;
> +
> +    switch (src->chroma_location) {
> +    case AVCHROMA_LOC_LEFT:
> +        loc = kCVImageBufferChromaLocation_Left;
> +        break;
> +    case AVCHROMA_LOC_CENTER:
> +        loc = kCVImageBufferChromaLocation_Center;
> +        break;
> +    case AVCHROMA_LOC_TOP:
> +        loc = kCVImageBufferChromaLocation_Top;
> +        break;
> +    case AVCHROMA_LOC_BOTTOM:
> +        loc = kCVImageBufferChromaLocation_Bottom;
> +        break;
> +    case AVCHROMA_LOC_TOPLEFT:
> +        loc = kCVImageBufferChromaLocation_TopLeft;
> +        break;
> +    case AVCHROMA_LOC_BOTTOMLEFT:
> +        loc = kCVImageBufferChromaLocation_BottomLeft;
> +        break;
> +    }
> +
> +    if (loc) {
> +        CVBufferSetAttachment(
> +            pixbuf,
> +            kCVImageBufferChromaLocationTopFieldKey,
> +            loc,
> +            kCVAttachmentMode_ShouldPropagate);
> +    }
> +
> +    return 0;
> +}
> +
> +static int vt_pixbuf_set_colorspace(AVHWFramesContext *hwfc,
> +                                    CVPixelBufferRef pixbuf, const
> AVFrame *src)
> +{
> +    CFStringRef colormatrix = NULL, colorpri = NULL, colortrc = NULL;
> +    Float32 gamma = 0;
> +
> +    switch (src->colorspace) {
> +    case AVCOL_SPC_BT2020_CL:
> +    case AVCOL_SPC_BT2020_NCL:
> +        if (__builtin_available(macOS 10.11, *))
> +            colormatrix = kCVImageBufferYCbCrMatrix_ITU_R_2020;
> +        else
> +            colormatrix = CFSTR("ITU_R_2020");
> +        break;
> +    case AVCOL_SPC_BT470BG:
> +    case AVCOL_SPC_SMPTE170M:
> +        colormatrix = kCVImageBufferYCbCrMatrix_ITU_R_601_4;
> +        break;
> +    case AVCOL_SPC_BT709:
> +        colormatrix = kCVImageBufferYCbCrMatrix_ITU_R_709_2;
> +        break;
> +    case AVCOL_SPC_SMPTE240M:
> +        colormatrix = kCVImageBufferYCbCrMatrix_SMPTE_240M_1995;
> +        break;
> +    case AVCOL_SPC_UNSPECIFIED:
> +        break;
> +    default:
> +        av_log(hwfc, AV_LOG_WARNING, "Color space %s is not
> supported.\n", av_color_space_name(src->colorspace));
> +    }
> +
> +    switch (src->color_primaries) {
> +    case AVCOL_PRI_BT2020:
> +        if (__builtin_available(macOS 10.11, *))
> +            colorpri = kCVImageBufferColorPrimaries_ITU_R_2020;
> +        else
> +            colorpri = CFSTR("ITU_R_2020");
> +        break;
> +    case AVCOL_PRI_BT709:
> +        colorpri = kCVImageBufferColorPrimaries_ITU_R_709_2;
> +        break;
> +    case AVCOL_PRI_SMPTE170M:
> +        colorpri = kCVImageBufferColorPrimaries_SMPTE_C;
> +        break;
> +    case AVCOL_PRI_BT470BG:
> +        colorpri = kCVImageBufferColorPrimaries_EBU_3213;
> +        break;
> +    case AVCOL_PRI_UNSPECIFIED:
> +        break;
> +    default:
> +        av_log(hwfc, AV_LOG_WARNING, "Color primaries %s is not
> supported.\n", av_color_primaries_name(src->color_primaries));
> +    }
> +
> +    switch (src->color_trc) {
> +    case AVCOL_TRC_SMPTE2084:
> +        if (__builtin_available(macOS 10.13, *))
> +            colortrc = kCVImageBufferTransferFunction_SMPTE_ST_2084_PQ;
> +        else
> +            colortrc = CFSTR("SMPTE_ST_2084_PQ");
> +        break;
> +    case AVCOL_TRC_BT2020_10:
> +    case AVCOL_TRC_BT2020_12:
> +        if (__builtin_available(macOS 10.11, *))
> +            colortrc = kCVImageBufferTransferFunction_ITU_R_2020;
> +        else
> +            colortrc = CFSTR("ITU_R_2020");
> +        break;
> +    case AVCOL_TRC_BT709:
> +        colortrc = kCVImageBufferTransferFunction_ITU_R_709_2;
> +        break;
> +    case AVCOL_TRC_SMPTE240M:
> +        colortrc = kCVImageBufferTransferFunction_SMPTE_240M_1995;
> +        break;
> +    case AVCOL_TRC_SMPTE428:
> +        if (__builtin_available(macOS 10.12, *))
> +            colortrc = kCVImageBufferTransferFunction_SMPTE_ST_428_1;
> +        else
> +            colortrc = CFSTR("SMPTE_ST_428_1");
> +        break;
> +    case AVCOL_TRC_ARIB_STD_B67:
> +        if (__builtin_available(macOS 10.13, *))
> +            colortrc = kCVImageBufferTransferFunction_ITU_R_2100_HLG;
> +        else
> +            colortrc = CFSTR("ITU_R_2100_HLG");
> +        break;
> +    case AVCOL_TRC_GAMMA22:
> +        gamma = 2.2;
> +        colortrc = kCVImageBufferTransferFunction_UseGamma;
> +        break;
> +    case AVCOL_TRC_GAMMA28:
> +        gamma = 2.8;
> +        colortrc = kCVImageBufferTransferFunction_UseGamma;
> +        break;
> +    case AVCOL_TRC_UNSPECIFIED:
> +        break;
> +    default:
> +        av_log(hwfc, AV_LOG_WARNING, "Color transfer function %s is not
> supported.\n", av_color_transfer_name(src->color_trc));
> +    }
> +
> +    if (colormatrix) {
> +        CVBufferSetAttachment(
> +            pixbuf,
> +            kCVImageBufferYCbCrMatrixKey,
> +            colormatrix,
> +            kCVAttachmentMode_ShouldPropagate);
> +    }
> +    if (colorpri) {
> +        CVBufferSetAttachment(
> +            pixbuf,
> +            kCVImageBufferColorPrimariesKey,
> +            colorpri,
> +            kCVAttachmentMode_ShouldPropagate);
> +    }
> +    if (colortrc) {
> +        CVBufferSetAttachment(
> +            pixbuf,
> +            kCVImageBufferTransferFunctionKey,
> +            colortrc,
> +            kCVAttachmentMode_ShouldPropagate);
> +    }
> +    if (gamma != 0) {
> +        CFNumberRef gamma_level = CFNumberCreate(NULL,
> kCFNumberFloat32Type, &gamma);
> +        CVBufferSetAttachment(
> +            pixbuf,
> +            kCVImageBufferGammaLevelKey,
> +            gamma_level,
> +            kCVAttachmentMode_ShouldPropagate);
> +        CFRelease(gamma_level);
> +    }
> +
> +    return 0;
> +}
> +
> +static int vt_pixbuf_set_attachments(AVHWFramesContext *hwfc,
> +                                     CVPixelBufferRef pixbuf, const
> AVFrame *src)
> +{
> +    int ret;
> +    ret = vt_pixbuf_set_par(hwfc, pixbuf, src);
> +    if (ret < 0)
> +        return ret;
> +    ret = vt_pixbuf_set_colorspace(hwfc, pixbuf, src);
> +    if (ret < 0)
> +        return ret;
> +    ret = vt_pixbuf_set_chromaloc(hwfc, pixbuf, src);
> +    if (ret < 0)
> +        return ret;
> +    return 0;
> +}
> +
>  static int vt_map_frame(AVHWFramesContext *ctx, AVFrame *dst, const
> AVFrame *src,
>                          int flags)
>  {
> @@ -223,6 +626,10 @@ static int vt_transfer_data_to(AVHWFramesContext
> *hwfc,
>      if (err)
>          goto fail;
>
> +    err = vt_pixbuf_set_attachments(hwfc, (CVPixelBufferRef)dst->data[3],
> src);
> +    if (err)
> +        goto fail;
> +
>      err = 0;
>  fail:
>      av_frame_free(&map);
> @@ -244,8 +651,13 @@ const HWContextType ff_hwcontext_type_videotoolbox = {
>      .type                 = AV_HWDEVICE_TYPE_VIDEOTOOLBOX,
>      .name                 = "videotoolbox",
>
> +    .frames_priv_size     = sizeof(VTFramesContext),
> +
>      .device_create        = vt_device_create,
> +    .frames_init          = vt_frames_init,
>      .frames_get_buffer    = vt_get_buffer,
> +    .frames_get_constraints = vt_frames_get_constraints,
> +    .frames_uninit        = vt_frames_uninit,
>      .transfer_get_formats = vt_transfer_get_formats,
>      .transfer_data_to     = vt_transfer_data_to,
>      .transfer_data_from   = vt_transfer_data_from,
> diff --git a/libavutil/hwcontext_videotoolbox.h
> b/libavutil/hwcontext_videotoolbox.h
> index 5074d79e68..62cde07c51 100644
> --- a/libavutil/hwcontext_videotoolbox.h
> +++ b/libavutil/hwcontext_videotoolbox.h
> @@ -29,11 +29,14 @@
>   * @file
>   * An API-specific header for AV_HWDEVICE_TYPE_VIDEOTOOLBOX.
>   *
> - * This API currently does not support frame allocation, as the raw
> VideoToolbox
> - * API does allocation, and FFmpeg itself never has the need to allocate
> frames.
> + * This API supports frame allocation using a native CVPixelBufferPool
> + * instead of an AVBufferPool.
>   *
>   * If the API user sets a custom pool, AVHWFramesContext.pool must return
>   * AVBufferRefs whose data pointer is a CVImageBufferRef or
> CVPixelBufferRef.
> + * Note that the underlying CVPixelBuffer could be retained by OS
> frameworks
> + * depending on application usage, so it is preferable to let CoreVideo
> manage
> + * the pool using the default implementation.
>   *
>   * Currently AVHWDeviceContext.hwctx and AVHWFramesContext.hwctx are
> always
>   * NULL.
> --
> 2.29.2
>
>
diff mbox series

Patch

diff --git a/libavutil/hwcontext_videotoolbox.c b/libavutil/hwcontext_videotoolbox.c
index bded9873fe..58095a1fc9 100644
--- a/libavutil/hwcontext_videotoolbox.c
+++ b/libavutil/hwcontext_videotoolbox.c
@@ -24,6 +24,7 @@ 
 #include <VideoToolbox/VideoToolbox.h>
 
 #include "buffer.h"
+#include "buffer_internal.h"
 #include "common.h"
 #include "hwcontext.h"
 #include "hwcontext_internal.h"
@@ -32,6 +33,10 @@ 
 #include "pixfmt.h"
 #include "pixdesc.h"
 
+typedef struct VTFramesContext {
+    CVPixelBufferPoolRef pool;
+} VTFramesContext;
+
 static const struct {
     uint32_t cv_fmt;
     bool full_range;
@@ -50,6 +55,39 @@  static const struct {
 #endif
 };
 
+static const enum AVPixelFormat supported_formats[] = {
+    AV_PIX_FMT_NV12,
+    AV_PIX_FMT_YUV420P,
+    AV_PIX_FMT_UYVY422,
+    AV_PIX_FMT_P010,
+    AV_PIX_FMT_BGRA,
+};
+
+static int vt_frames_get_constraints(AVHWDeviceContext *ctx,
+                                     const void *hwconfig,
+                                     AVHWFramesConstraints *constraints)
+{
+    int i;
+
+    constraints->valid_sw_formats = av_malloc_array(FF_ARRAY_ELEMS(supported_formats) + 1,
+                                                    sizeof(*constraints->valid_sw_formats));
+    if (!constraints->valid_sw_formats)
+        return AVERROR(ENOMEM);
+
+    for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
+        constraints->valid_sw_formats[i] = supported_formats[i];
+    constraints->valid_sw_formats[FF_ARRAY_ELEMS(supported_formats)] = AV_PIX_FMT_NONE;
+
+    constraints->valid_hw_formats = av_malloc_array(2, sizeof(*constraints->valid_hw_formats));
+    if (!constraints->valid_hw_formats)
+        return AVERROR(ENOMEM);
+
+    constraints->valid_hw_formats[0] = AV_PIX_FMT_VIDEOTOOLBOX;
+    constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
+
+    return 0;
+}
+
 enum AVPixelFormat av_map_videotoolbox_format_to_pixfmt(uint32_t cv_fmt)
 {
     int i;
@@ -75,11 +113,134 @@  uint32_t av_map_videotoolbox_format_from_pixfmt2(enum AVPixelFormat pix_fmt, boo
     return 0;
 }
 
+static int vt_pool_alloc(AVHWFramesContext *ctx)
+{
+    VTFramesContext *fctx = ctx->internal->priv;
+    CVReturn err;
+    CFNumberRef w, h, pixfmt;
+    uint32_t cv_pixfmt;
+    CFMutableDictionaryRef attributes, iosurface_properties;
+
+    attributes = CFDictionaryCreateMutable(
+        NULL,
+        2,
+        &kCFTypeDictionaryKeyCallBacks,
+        &kCFTypeDictionaryValueCallBacks);
+
+    cv_pixfmt = av_map_videotoolbox_format_from_pixfmt(ctx->sw_format);
+    pixfmt = CFNumberCreate(NULL, kCFNumberSInt32Type, &cv_pixfmt);
+    CFDictionarySetValue(
+        attributes,
+        kCVPixelBufferPixelFormatTypeKey,
+        pixfmt);
+    CFRelease(pixfmt);
+
+    iosurface_properties = CFDictionaryCreateMutable(
+        NULL,
+        0,
+        &kCFTypeDictionaryKeyCallBacks,
+        &kCFTypeDictionaryValueCallBacks);
+    CFDictionarySetValue(attributes, kCVPixelBufferIOSurfacePropertiesKey, iosurface_properties);
+    CFRelease(iosurface_properties);
+
+    w = CFNumberCreate(NULL, kCFNumberSInt32Type, &ctx->width);
+    h = CFNumberCreate(NULL, kCFNumberSInt32Type, &ctx->height);
+    CFDictionarySetValue(attributes, kCVPixelBufferWidthKey, w);
+    CFDictionarySetValue(attributes, kCVPixelBufferHeightKey, h);
+    CFRelease(w);
+    CFRelease(h);
+
+    err = CVPixelBufferPoolCreate(
+        NULL,
+        NULL,
+        attributes,
+        &fctx->pool);
+    CFRelease(attributes);
+
+    if (err == kCVReturnSuccess)
+        return 0;
+
+    av_log(ctx, AV_LOG_ERROR, "Error creating CVPixelBufferPool: %d\n", err);
+    return AVERROR_EXTERNAL;
+}
+
+static AVBufferRef *vt_dummy_pool_alloc(void *opaque, size_t size)
+{
+    return NULL;
+}
+
+static void vt_frames_uninit(AVHWFramesContext *ctx)
+{
+    VTFramesContext *fctx = ctx->internal->priv;
+    if (fctx->pool) {
+        CVPixelBufferPoolRelease(fctx->pool);
+        fctx->pool = NULL;
+    }
+}
+
+static int vt_frames_init(AVHWFramesContext *ctx)
+{
+    int i, ret;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) {
+        if (ctx->sw_format == supported_formats[i])
+            break;
+    }
+    if (i == FF_ARRAY_ELEMS(supported_formats)) {
+        av_log(ctx, AV_LOG_ERROR, "Pixel format '%s' is not supported\n",
+               av_get_pix_fmt_name(ctx->sw_format));
+        return AVERROR(ENOSYS);
+    }
+
+    // create a dummy pool so av_hwframe_get_buffer doesn't EINVAL
+    if (!ctx->pool) {
+        ctx->internal->pool_internal = av_buffer_pool_init2(0, ctx, vt_dummy_pool_alloc, NULL);
+        if (!ctx->internal->pool_internal)
+            return AVERROR(ENOMEM);
+    }
+
+    ret = vt_pool_alloc(ctx);
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
+
+static void videotoolbox_buffer_release(void *opaque, uint8_t *data)
+{
+    CVPixelBufferRelease((CVPixelBufferRef)data);
+}
+
 static int vt_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
 {
-    frame->buf[0] = av_buffer_pool_get(ctx->pool);
-    if (!frame->buf[0])
-        return AVERROR(ENOMEM);
+    VTFramesContext *fctx = ctx->internal->priv;
+
+    if (ctx->pool && ctx->pool->size != 0) {
+        frame->buf[0] = av_buffer_pool_get(ctx->pool);
+        if (!frame->buf[0])
+            return AVERROR(ENOMEM);
+    } else {
+        CVPixelBufferRef pixbuf;
+        AVBufferRef *buf = NULL;
+        CVReturn err;
+
+        err = CVPixelBufferPoolCreatePixelBuffer(
+            NULL,
+            fctx->pool,
+            &pixbuf
+        );
+        if (err != kCVReturnSuccess) {
+            av_log(ctx, AV_LOG_ERROR, "Failed to create pixel buffer from pool: %d\n", err);
+            return AVERROR_EXTERNAL;
+        }
+
+        buf = av_buffer_create((uint8_t *)pixbuf, 1, videotoolbox_buffer_release, NULL, 0);
+        if (!buf) {
+            CVPixelBufferRelease(pixbuf);
+            return AVERROR(ENOMEM);
+        }
+        frame->buf[0] = buf;
+    }
 
     frame->data[3] = frame->buf[0]->data;
     frame->format  = AV_PIX_FMT_VIDEOTOOLBOX;
@@ -111,6 +272,248 @@  static void vt_unmap(AVHWFramesContext *ctx, HWMapDescriptor *hwmap)
     CVPixelBufferUnlockBaseAddress(pixbuf, (uintptr_t)hwmap->priv);
 }
 
+static int vt_pixbuf_set_par(AVHWFramesContext *hwfc,
+                             CVPixelBufferRef pixbuf, const AVFrame *src)
+{
+    CFMutableDictionaryRef par = NULL;
+    CFNumberRef num = NULL, den = NULL;
+    AVRational avpar = src->sample_aspect_ratio;
+
+    if (avpar.num == 0)
+        return 0;
+
+    av_reduce(&avpar.num, &avpar.den,
+                avpar.num, avpar.den,
+                0xFFFFFFFF);
+
+    num = CFNumberCreate(kCFAllocatorDefault,
+                            kCFNumberIntType,
+                            &avpar.num);
+
+    den = CFNumberCreate(kCFAllocatorDefault,
+                            kCFNumberIntType,
+                            &avpar.den);
+
+    par = CFDictionaryCreateMutable(kCFAllocatorDefault,
+                                    2,
+                                    &kCFCopyStringDictionaryKeyCallBacks,
+                                    &kCFTypeDictionaryValueCallBacks);
+
+    if (!par || !num || !den) {
+        if (par) CFRelease(par);
+        if (num) CFRelease(num);
+        if (den) CFRelease(den);
+        return AVERROR(ENOMEM);
+    }
+
+    CFDictionarySetValue(
+        par,
+        kCVImageBufferPixelAspectRatioHorizontalSpacingKey,
+        num);
+    CFDictionarySetValue(
+        par,
+        kCVImageBufferPixelAspectRatioVerticalSpacingKey,
+        den);
+
+    CVBufferSetAttachment(
+        pixbuf,
+        kCVImageBufferPixelAspectRatioKey,
+        par,
+        kCVAttachmentMode_ShouldPropagate
+    );
+
+    CFRelease(par);
+    CFRelease(num);
+    CFRelease(den);
+
+    return 0;
+}
+
+static int vt_pixbuf_set_chromaloc(AVHWFramesContext *hwfc,
+                                   CVPixelBufferRef pixbuf, const AVFrame *src)
+{
+    CFStringRef loc = NULL;
+
+    switch (src->chroma_location) {
+    case AVCHROMA_LOC_LEFT:
+        loc = kCVImageBufferChromaLocation_Left;
+        break;
+    case AVCHROMA_LOC_CENTER:
+        loc = kCVImageBufferChromaLocation_Center;
+        break;
+    case AVCHROMA_LOC_TOP:
+        loc = kCVImageBufferChromaLocation_Top;
+        break;
+    case AVCHROMA_LOC_BOTTOM:
+        loc = kCVImageBufferChromaLocation_Bottom;
+        break;
+    case AVCHROMA_LOC_TOPLEFT:
+        loc = kCVImageBufferChromaLocation_TopLeft;
+        break;
+    case AVCHROMA_LOC_BOTTOMLEFT:
+        loc = kCVImageBufferChromaLocation_BottomLeft;
+        break;
+    }
+
+    if (loc) {
+        CVBufferSetAttachment(
+            pixbuf,
+            kCVImageBufferChromaLocationTopFieldKey,
+            loc,
+            kCVAttachmentMode_ShouldPropagate);
+    }
+
+    return 0;
+}
+
+static int vt_pixbuf_set_colorspace(AVHWFramesContext *hwfc,
+                                    CVPixelBufferRef pixbuf, const AVFrame *src)
+{
+    CFStringRef colormatrix = NULL, colorpri = NULL, colortrc = NULL;
+    Float32 gamma = 0;
+
+    switch (src->colorspace) {
+    case AVCOL_SPC_BT2020_CL:
+    case AVCOL_SPC_BT2020_NCL:
+        if (__builtin_available(macOS 10.11, *))
+            colormatrix = kCVImageBufferYCbCrMatrix_ITU_R_2020;
+        else
+            colormatrix = CFSTR("ITU_R_2020");
+        break;
+    case AVCOL_SPC_BT470BG:
+    case AVCOL_SPC_SMPTE170M:
+        colormatrix = kCVImageBufferYCbCrMatrix_ITU_R_601_4;
+        break;
+    case AVCOL_SPC_BT709:
+        colormatrix = kCVImageBufferYCbCrMatrix_ITU_R_709_2;
+        break;
+    case AVCOL_SPC_SMPTE240M:
+        colormatrix = kCVImageBufferYCbCrMatrix_SMPTE_240M_1995;
+        break;
+    case AVCOL_SPC_UNSPECIFIED:
+        break;
+    default:
+        av_log(hwfc, AV_LOG_WARNING, "Color space %s is not supported.\n", av_color_space_name(src->colorspace));
+    }
+
+    switch (src->color_primaries) {
+    case AVCOL_PRI_BT2020:
+        if (__builtin_available(macOS 10.11, *))
+            colorpri = kCVImageBufferColorPrimaries_ITU_R_2020;
+        else
+            colorpri = CFSTR("ITU_R_2020");
+        break;
+    case AVCOL_PRI_BT709:
+        colorpri = kCVImageBufferColorPrimaries_ITU_R_709_2;
+        break;
+    case AVCOL_PRI_SMPTE170M:
+        colorpri = kCVImageBufferColorPrimaries_SMPTE_C;
+        break;
+    case AVCOL_PRI_BT470BG:
+        colorpri = kCVImageBufferColorPrimaries_EBU_3213;
+        break;
+    case AVCOL_PRI_UNSPECIFIED:
+        break;
+    default:
+        av_log(hwfc, AV_LOG_WARNING, "Color primaries %s is not supported.\n", av_color_primaries_name(src->color_primaries));
+    }
+
+    switch (src->color_trc) {
+    case AVCOL_TRC_SMPTE2084:
+        if (__builtin_available(macOS 10.13, *))
+            colortrc = kCVImageBufferTransferFunction_SMPTE_ST_2084_PQ;
+        else
+            colortrc = CFSTR("SMPTE_ST_2084_PQ");
+        break;
+    case AVCOL_TRC_BT2020_10:
+    case AVCOL_TRC_BT2020_12:
+        if (__builtin_available(macOS 10.11, *))
+            colortrc = kCVImageBufferTransferFunction_ITU_R_2020;
+        else
+            colortrc = CFSTR("ITU_R_2020");
+        break;
+    case AVCOL_TRC_BT709:
+        colortrc = kCVImageBufferTransferFunction_ITU_R_709_2;
+        break;
+    case AVCOL_TRC_SMPTE240M:
+        colortrc = kCVImageBufferTransferFunction_SMPTE_240M_1995;
+        break;
+    case AVCOL_TRC_SMPTE428:
+        if (__builtin_available(macOS 10.12, *))
+            colortrc = kCVImageBufferTransferFunction_SMPTE_ST_428_1;
+        else
+            colortrc = CFSTR("SMPTE_ST_428_1");
+        break;
+    case AVCOL_TRC_ARIB_STD_B67:
+        if (__builtin_available(macOS 10.13, *))
+            colortrc = kCVImageBufferTransferFunction_ITU_R_2100_HLG;
+        else
+            colortrc = CFSTR("ITU_R_2100_HLG");
+        break;
+    case AVCOL_TRC_GAMMA22:
+        gamma = 2.2;
+        colortrc = kCVImageBufferTransferFunction_UseGamma;
+        break;
+    case AVCOL_TRC_GAMMA28:
+        gamma = 2.8;
+        colortrc = kCVImageBufferTransferFunction_UseGamma;
+        break;
+    case AVCOL_TRC_UNSPECIFIED:
+        break;
+    default:
+        av_log(hwfc, AV_LOG_WARNING, "Color transfer function %s is not supported.\n", av_color_transfer_name(src->color_trc));
+    }
+
+    if (colormatrix) {
+        CVBufferSetAttachment(
+            pixbuf,
+            kCVImageBufferYCbCrMatrixKey,
+            colormatrix,
+            kCVAttachmentMode_ShouldPropagate);
+    }
+    if (colorpri) {
+        CVBufferSetAttachment(
+            pixbuf,
+            kCVImageBufferColorPrimariesKey,
+            colorpri,
+            kCVAttachmentMode_ShouldPropagate);
+    }
+    if (colortrc) {
+        CVBufferSetAttachment(
+            pixbuf,
+            kCVImageBufferTransferFunctionKey,
+            colortrc,
+            kCVAttachmentMode_ShouldPropagate);
+    }
+    if (gamma != 0) {
+        CFNumberRef gamma_level = CFNumberCreate(NULL, kCFNumberFloat32Type, &gamma);
+        CVBufferSetAttachment(
+            pixbuf,
+            kCVImageBufferGammaLevelKey,
+            gamma_level,
+            kCVAttachmentMode_ShouldPropagate);
+        CFRelease(gamma_level);
+    }
+
+    return 0;
+}
+
+static int vt_pixbuf_set_attachments(AVHWFramesContext *hwfc,
+                                     CVPixelBufferRef pixbuf, const AVFrame *src)
+{
+    int ret;
+    ret = vt_pixbuf_set_par(hwfc, pixbuf, src);
+    if (ret < 0)
+        return ret;
+    ret = vt_pixbuf_set_colorspace(hwfc, pixbuf, src);
+    if (ret < 0)
+        return ret;
+    ret = vt_pixbuf_set_chromaloc(hwfc, pixbuf, src);
+    if (ret < 0)
+        return ret;
+    return 0;
+}
+
 static int vt_map_frame(AVHWFramesContext *ctx, AVFrame *dst, const AVFrame *src,
                         int flags)
 {
@@ -223,6 +626,10 @@  static int vt_transfer_data_to(AVHWFramesContext *hwfc,
     if (err)
         goto fail;
 
+    err = vt_pixbuf_set_attachments(hwfc, (CVPixelBufferRef)dst->data[3], src);
+    if (err)
+        goto fail;
+
     err = 0;
 fail:
     av_frame_free(&map);
@@ -244,8 +651,13 @@  const HWContextType ff_hwcontext_type_videotoolbox = {
     .type                 = AV_HWDEVICE_TYPE_VIDEOTOOLBOX,
     .name                 = "videotoolbox",
 
+    .frames_priv_size     = sizeof(VTFramesContext),
+
     .device_create        = vt_device_create,
+    .frames_init          = vt_frames_init,
     .frames_get_buffer    = vt_get_buffer,
+    .frames_get_constraints = vt_frames_get_constraints,
+    .frames_uninit        = vt_frames_uninit,
     .transfer_get_formats = vt_transfer_get_formats,
     .transfer_data_to     = vt_transfer_data_to,
     .transfer_data_from   = vt_transfer_data_from,
diff --git a/libavutil/hwcontext_videotoolbox.h b/libavutil/hwcontext_videotoolbox.h
index 5074d79e68..62cde07c51 100644
--- a/libavutil/hwcontext_videotoolbox.h
+++ b/libavutil/hwcontext_videotoolbox.h
@@ -29,11 +29,14 @@ 
  * @file
  * An API-specific header for AV_HWDEVICE_TYPE_VIDEOTOOLBOX.
  *
- * This API currently does not support frame allocation, as the raw VideoToolbox
- * API does allocation, and FFmpeg itself never has the need to allocate frames.
+ * This API supports frame allocation using a native CVPixelBufferPool
+ * instead of an AVBufferPool.
  *
  * If the API user sets a custom pool, AVHWFramesContext.pool must return
  * AVBufferRefs whose data pointer is a CVImageBufferRef or CVPixelBufferRef.
+ * Note that the underlying CVPixelBuffer could be retained by OS frameworks
+ * depending on application usage, so it is preferable to let CoreVideo manage
+ * the pool using the default implementation.
  *
  * Currently AVHWDeviceContext.hwctx and AVHWFramesContext.hwctx are always
  * NULL.