diff mbox series

[FFmpeg-devel] libavdevice/avfoundation: add buffer fifo and output packets in order they arrive

Message ID 20210214060442.20157-1-mindmark@gmail.com
State New
Headers show
Series [FFmpeg-devel] libavdevice/avfoundation: add buffer fifo and output packets in order they arrive | expand

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate success Make fate finished
andriy/PPC64_make success Make finished
andriy/PPC64_make_fate success Make fate finished

Commit Message

Mark Reid Feb. 14, 2021, 6:04 a.m. UTC
From: Mark Reid <mindmark@gmail.com>

Hi,
This patch fixes audio issues I've had with some capture devices. The audio
gets really choppy and stops working. This seems to be because avf_read_packet
stops outputting the audio frames because a video frame happens to be available first.

It base on the approach used in a patch from #4437
https://trac.ffmpeg.org/ticket/4437

My approach uses an AVFifoBuffer instead of NSMutableArray and also
outputs the packets in the same order they arrive from AVFFoundation.

should fix ticket #4437 and #4513


---
 libavdevice/avfoundation.m | 160 ++++++++++++++++++++++++++++---------
 1 file changed, 124 insertions(+), 36 deletions(-)

Comments

Mark Reid March 1, 2021, 4:30 a.m. UTC | #1
On Sat, Feb 13, 2021 at 10:04 PM <mindmark@gmail.com> wrote:

> From: Mark Reid <mindmark@gmail.com>
>
> Hi,
> This patch fixes audio issues I've had with some capture devices. The audio
> gets really choppy and stops working. This seems to be because
> avf_read_packet
> stops outputting the audio frames because a video frame happens to be
> available first.
>
> It base on the approach used in a patch from #4437
> https://trac.ffmpeg.org/ticket/4437
>
> My approach uses an AVFifoBuffer instead of NSMutableArray and also
> outputs the packets in the same order they arrive from AVFFoundation.
>
> should fix ticket #4437 and #4513
>
>
> ---
>  libavdevice/avfoundation.m | 160 ++++++++++++++++++++++++++++---------
>  1 file changed, 124 insertions(+), 36 deletions(-)
>
> diff --git a/libavdevice/avfoundation.m b/libavdevice/avfoundation.m
> index 59d5b0af4f..5ac6ec4183 100644
> --- a/libavdevice/avfoundation.m
> +++ b/libavdevice/avfoundation.m
> @@ -31,13 +31,17 @@
>  #include "libavutil/pixdesc.h"
>  #include "libavutil/opt.h"
>  #include "libavutil/avstring.h"
> +#include "libavutil/avassert.h"
>  #include "libavformat/internal.h"
>  #include "libavutil/internal.h"
>  #include "libavutil/parseutils.h"
>  #include "libavutil/time.h"
>  #include "libavutil/imgutils.h"
> +#include "libavutil/fifo.h"
>  #include "avdevice.h"
>
> +#define FIFO_SIZE 4
> +
>  static const int avf_time_base = 1000000;
>
>  static const AVRational avf_time_base_q = {
> @@ -128,8 +132,8 @@ typedef struct
>      AVCaptureSession         *capture_session;
>      AVCaptureVideoDataOutput *video_output;
>      AVCaptureAudioDataOutput *audio_output;
> -    CMSampleBufferRef         current_frame;
> -    CMSampleBufferRef         current_audio_frame;
> +    AVFifoBuffer *video_fifo;
> +    AVFifoBuffer *audio_fifo;
>
>      AVCaptureDevice          *observed_device;
>  #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070
> @@ -138,6 +142,11 @@ typedef struct
>      int                      observed_quit;
>  } AVFContext;
>
> +typedef struct {
> +    int64_t ts;
> +    CMSampleBufferRef frame;
> +} BufferRef;
> +
>  static void lock_frames(AVFContext* ctx)
>  {
>      pthread_mutex_lock(&ctx->frame_lock);
> @@ -148,6 +157,48 @@ static void unlock_frames(AVFContext* ctx)
>      pthread_mutex_unlock(&ctx->frame_lock);
>  }
>
> +static inline void fifo_write(AVFifoBuffer* f, int64_t ts,
> CMSampleBufferRef frame)
> +{
> +    BufferRef buf = {
> +        .ts    = ts,
> +        .frame = frame,
> +    };
> +
> +    CFRetain(frame);
> +    av_fifo_generic_write(f, &buf, sizeof(BufferRef), NULL);
> +}
> +
> +static inline void fifo_peek(AVFifoBuffer* f, BufferRef *buf)
> +{
> +    if (av_fifo_size(f)) {
> +        av_fifo_generic_peek(f, buf, sizeof(BufferRef), NULL);
> +        return;
> +    }
> +    buf->frame = nil;
> +    return;
> +}
> +
> +static inline void fifo_drain(AVFifoBuffer* f, int release)
> +{
> +    av_assert2(av_fifo_size(f) >= sizeof(BufferRef));
> +    if (release) {
> +        BufferRef buf;
> +        fifo_peek(f, &buf);
> +        CFRelease(buf.frame);
> +    }
> +    av_fifo_drain(f, sizeof(BufferRef));
> +}
> +
> +static inline void fifo_freep(AVFifoBuffer **f)
> +{
> +    if (f) {
> +        while (av_fifo_size(*f)) {
> +            fifo_drain(*f, 1);
> +        }
> +        av_fifo_freep(f);
> +    }
> +}
> +
>  /** FrameReciever class - delegate for AVCaptureSession
>   */
>  @interface AVFFrameReceiver : NSObject
> @@ -225,13 +276,16 @@ static void unlock_frames(AVFContext* ctx)
>    didOutputSampleBuffer:(CMSampleBufferRef)videoFrame
>           fromConnection:(AVCaptureConnection *)connection
>  {
> +    AVFifoBuffer *fifo = _context->video_fifo;
> +    int64_t ts = av_gettime_relative();
>      lock_frames(_context);
>
> -    if (_context->current_frame != nil) {
> -        CFRelease(_context->current_frame);
> +    if (av_fifo_space(fifo) == 0) {
> +        av_log(_context, AV_LOG_DEBUG, "video fifo is full, the oldest
> frame has been dropped\n");
> +        fifo_drain(fifo, 1);
>      }
>
> -    _context->current_frame = (CMSampleBufferRef)CFRetain(videoFrame);
> +    fifo_write(fifo, ts, videoFrame);
>
>      unlock_frames(_context);
>
> @@ -269,13 +323,16 @@ static void unlock_frames(AVFContext* ctx)
>    didOutputSampleBuffer:(CMSampleBufferRef)audioFrame
>           fromConnection:(AVCaptureConnection *)connection
>  {
> +    AVFifoBuffer *fifo = _context->audio_fifo;
> +    int64_t ts = av_gettime_relative();
>      lock_frames(_context);
>
> -    if (_context->current_audio_frame != nil) {
> -        CFRelease(_context->current_audio_frame);
> +    if (!av_fifo_space(fifo)) {
> +        av_log(_context, AV_LOG_DEBUG, "audio fifo is full, the oldest
> frame has been dropped\n");
> +        fifo_drain(fifo, 1);
>      }
>
> -    _context->current_audio_frame =
> (CMSampleBufferRef)CFRetain(audioFrame);
> +    fifo_write(fifo, ts, audioFrame);
>
>      unlock_frames(_context);
>
> @@ -301,12 +358,10 @@ static void destroy_context(AVFContext* ctx)
>      ctx->avf_audio_delegate = NULL;
>
>      av_freep(&ctx->audio_buffer);
> +    fifo_freep(&ctx->video_fifo);
> +    fifo_freep(&ctx->audio_fifo);
>
>      pthread_mutex_destroy(&ctx->frame_lock);
> -
> -    if (ctx->current_frame) {
> -        CFRelease(ctx->current_frame);
> -    }
>  }
>
>  static void parse_device_name(AVFormatContext *s)
> @@ -624,6 +679,7 @@ static int add_audio_device(AVFormatContext *s,
> AVCaptureDevice *audio_device)
>  static int get_video_config(AVFormatContext *s)
>  {
>      AVFContext *ctx = (AVFContext*)s->priv_data;
> +    BufferRef buf;
>      CVImageBufferRef image_buffer;
>      CMBlockBufferRef block_buffer;
>      CGSize image_buffer_size;
> @@ -644,8 +700,13 @@ static int get_video_config(AVFormatContext *s)
>
>      avpriv_set_pts_info(stream, 64, 1, avf_time_base);
>
> -    image_buffer = CMSampleBufferGetImageBuffer(ctx->current_frame);
> -    block_buffer = CMSampleBufferGetDataBuffer(ctx->current_frame);
> +    fifo_peek(ctx->video_fifo, &buf);
> +    if (buf.frame == nil) {
> +        return 1;
> +    }
> +
> +    image_buffer = CMSampleBufferGetImageBuffer(buf.frame);
> +    block_buffer = CMSampleBufferGetDataBuffer(buf.frame);
>
>      if (image_buffer) {
>          image_buffer_size = CVImageBufferGetEncodedSize(image_buffer);
> @@ -661,9 +722,6 @@ static int get_video_config(AVFormatContext *s)
>          stream->codecpar->format     = ctx->pixel_format;
>      }
>
> -    CFRelease(ctx->current_frame);
> -    ctx->current_frame = nil;
> -
>      unlock_frames(ctx);
>
>      return 0;
> @@ -672,6 +730,7 @@ static int get_video_config(AVFormatContext *s)
>  static int get_audio_config(AVFormatContext *s)
>  {
>      AVFContext *ctx = (AVFContext*)s->priv_data;
> +    BufferRef buf;
>      CMFormatDescriptionRef format_desc;
>      AVStream* stream = avformat_new_stream(s, NULL);
>
> @@ -690,7 +749,12 @@ static int get_audio_config(AVFormatContext *s)
>
>      avpriv_set_pts_info(stream, 64, 1, avf_time_base);
>
> -    format_desc =
> CMSampleBufferGetFormatDescription(ctx->current_audio_frame);
> +    fifo_peek(ctx->audio_fifo, &buf);
> +    if (buf.frame == nil) {
> +        return 1;
> +    }
> +
> +    format_desc = CMSampleBufferGetFormatDescription(buf.frame);
>      const AudioStreamBasicDescription *basic_desc =
> CMAudioFormatDescriptionGetStreamBasicDescription(format_desc);
>
>      if (!basic_desc) {
> @@ -737,7 +801,7 @@ static int get_audio_config(AVFormatContext *s)
>      }
>
>      if (ctx->audio_non_interleaved) {
> -        CMBlockBufferRef block_buffer =
> CMSampleBufferGetDataBuffer(ctx->current_audio_frame);
> +        CMBlockBufferRef block_buffer =
> CMSampleBufferGetDataBuffer(buf.frame);
>          ctx->audio_buffer_size        =
> CMBlockBufferGetDataLength(block_buffer);
>          ctx->audio_buffer             = av_malloc(ctx->audio_buffer_size);
>          if (!ctx->audio_buffer) {
> @@ -746,9 +810,6 @@ static int get_audio_config(AVFormatContext *s)
>          }
>      }
>
> -    CFRelease(ctx->current_audio_frame);
> -    ctx->current_audio_frame = nil;
> -
>      unlock_frames(ctx);
>
>      return 0;
> @@ -771,6 +832,9 @@ static int avf_read_header(AVFormatContext *s)
>
>      pthread_mutex_init(&ctx->frame_lock, NULL);
>
> +    ctx->video_fifo = av_fifo_alloc_array(FIFO_SIZE, sizeof(BufferRef));
> +    ctx->audio_fifo = av_fifo_alloc_array(FIFO_SIZE, sizeof(BufferRef));
> +
>  #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070
>      CGGetActiveDisplayList(0, NULL, &num_screens);
>  #endif
> @@ -1051,33 +1115,52 @@ static int avf_read_packet(AVFormatContext *s,
> AVPacket *pkt)
>      AVFContext* ctx = (AVFContext*)s->priv_data;
>
>      do {
> +        BufferRef video;
> +        BufferRef audio;
>          CVImageBufferRef image_buffer;
>          CMBlockBufferRef block_buffer;
>          lock_frames(ctx);
>
> -        if (ctx->current_frame != nil) {
> +        fifo_peek(ctx->video_fifo, &video);
> +        fifo_peek(ctx->audio_fifo, &audio);
> +
> +        if (video.frame != nil && audio.frame != nil) {
> +            // process oldest CMSampleBufferRef first
> +            if (audio.ts <= video.ts) {
> +                video.frame = nil;
> +            } else {
> +                audio.frame = nil;
> +            }
> +        }
> +
> +        if (video.frame != nil) {
>              int status;
>              int length = 0;
>
> -            image_buffer =
> CMSampleBufferGetImageBuffer(ctx->current_frame);
> -            block_buffer =
> CMSampleBufferGetDataBuffer(ctx->current_frame);
> +            fifo_drain(ctx->video_fifo, 0);
> +            unlock_frames(ctx);
> +
> +            image_buffer = CMSampleBufferGetImageBuffer(video.frame);
> +            block_buffer = CMSampleBufferGetDataBuffer(video.frame);
>
>              if (image_buffer != nil) {
>                  length = (int)CVPixelBufferGetDataSize(image_buffer);
>              } else if (block_buffer != nil) {
>                  length = (int)CMBlockBufferGetDataLength(block_buffer);
>              } else  {
> +                CFRelease(video.frame);
>                  return AVERROR(EINVAL);
>              }
>
>              if (av_new_packet(pkt, length) < 0) {
> +                CFRelease(video.frame);
>                  return AVERROR(EIO);
>              }
>
>              CMItemCount count;
>              CMSampleTimingInfo timing_info;
>
> -            if
> (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_frame, 1,
> &timing_info, &count) == noErr) {
> +            if (CMSampleBufferGetOutputSampleTimingInfoArray(video.frame,
> 1, &timing_info, &count) == noErr) {
>                  AVRational timebase_q = av_make_q(1,
> timing_info.presentationTimeStamp.timescale);
>                  pkt->pts = pkt->dts =
> av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q,
> avf_time_base_q);
>              }
> @@ -1094,31 +1177,37 @@ static int avf_read_packet(AVFormatContext *s,
> AVPacket *pkt)
>                      status = AVERROR(EIO);
>                  }
>               }
> -            CFRelease(ctx->current_frame);
> -            ctx->current_frame = nil;
> +            CFRelease(video.frame);
>
> -            if (status < 0)
> +            if (status < 0) {
>                  return status;
> -        } else if (ctx->current_audio_frame != nil) {
> -            CMBlockBufferRef block_buffer =
> CMSampleBufferGetDataBuffer(ctx->current_audio_frame);
> +            }
> +        } else if (audio.frame != nil) {
> +            CMBlockBufferRef block_buffer =
> CMSampleBufferGetDataBuffer(audio.frame);
>              int block_buffer_size         =
> CMBlockBufferGetDataLength(block_buffer);
>
> +            fifo_drain(ctx->audio_fifo, 0);
> +            unlock_frames(ctx);
> +
>              if (!block_buffer || !block_buffer_size) {
> +                CFRelease(audio.frame);
>                  return AVERROR(EIO);
>              }
>
>              if (ctx->audio_non_interleaved && block_buffer_size >
> ctx->audio_buffer_size) {
> +                CFRelease(audio.frame);
>                  return AVERROR_BUFFER_TOO_SMALL;
>              }
>
>              if (av_new_packet(pkt, block_buffer_size) < 0) {
> +                CFRelease(audio.frame);
>                  return AVERROR(EIO);
>              }
>
>              CMItemCount count;
>              CMSampleTimingInfo timing_info;
>
> -            if
> (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_audio_frame, 1,
> &timing_info, &count) == noErr) {
> +            if (CMSampleBufferGetOutputSampleTimingInfoArray(audio.frame,
> 1, &timing_info, &count) == noErr) {
>                  AVRational timebase_q = av_make_q(1,
> timing_info.presentationTimeStamp.timescale);
>                  pkt->pts = pkt->dts =
> av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q,
> avf_time_base_q);
>              }
> @@ -1131,6 +1220,7 @@ static int avf_read_packet(AVFormatContext *s,
> AVPacket *pkt)
>
>                  OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer,
> 0, pkt->size, ctx->audio_buffer);
>                  if (ret != kCMBlockBufferNoErr) {
> +                    CFRelease(audio.frame);
>                      return AVERROR(EIO);
>                  }
>
> @@ -1162,12 +1252,12 @@ static int avf_read_packet(AVFormatContext *s,
> AVPacket *pkt)
>              } else {
>                  OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer,
> 0, pkt->size, pkt->data);
>                  if (ret != kCMBlockBufferNoErr) {
> +                    CFRelease(audio.frame);
>                      return AVERROR(EIO);
>                  }
>              }
>
> -            CFRelease(ctx->current_audio_frame);
> -            ctx->current_audio_frame = nil;
> +            CFRelease(audio.frame);
>          } else {
>              pkt->data = NULL;
>              unlock_frames(ctx);
> @@ -1177,8 +1267,6 @@ static int avf_read_packet(AVFormatContext *s,
> AVPacket *pkt)
>                  return AVERROR(EAGAIN);
>              }
>          }
> -
> -        unlock_frames(ctx);
>      } while (!pkt->data);
>
>      return 0;
> --
> 2.29.2
>
>
ping
Mark Reid March 13, 2021, 8:45 p.m. UTC | #2
On Sun., Feb. 28, 2021, 8:30 p.m. Mark Reid, <mindmark@gmail.com> wrote:

>
>
> On Sat, Feb 13, 2021 at 10:04 PM <mindmark@gmail.com> wrote:
>
>> From: Mark Reid <mindmark@gmail.com>
>>
>> Hi,
>> This patch fixes audio issues I've had with some capture devices. The
>> audio
>> gets really choppy and stops working. This seems to be because
>> avf_read_packet
>> stops outputting the audio frames because a video frame happens to be
>> available first.
>>
>> It base on the approach used in a patch from #4437
>> https://trac.ffmpeg.org/ticket/4437
>>
>> My approach uses an AVFifoBuffer instead of NSMutableArray and also
>> outputs the packets in the same order they arrive from AVFFoundation.
>>
>> should fix ticket #4437 and #4513
>>
>>
>> ---
>>  libavdevice/avfoundation.m | 160 ++++++++++++++++++++++++++++---------
>>  1 file changed, 124 insertions(+), 36 deletions(-)
>>
>> diff --git a/libavdevice/avfoundation.m b/libavdevice/avfoundation.m
>> index 59d5b0af4f..5ac6ec4183 100644
>> --- a/libavdevice/avfoundation.m
>> +++ b/libavdevice/avfoundation.m
>> @@ -31,13 +31,17 @@
>>  #include "libavutil/pixdesc.h"
>>  #include "libavutil/opt.h"
>>  #include "libavutil/avstring.h"
>> +#include "libavutil/avassert.h"
>>  #include "libavformat/internal.h"
>>  #include "libavutil/internal.h"
>>  #include "libavutil/parseutils.h"
>>  #include "libavutil/time.h"
>>  #include "libavutil/imgutils.h"
>> +#include "libavutil/fifo.h"
>>  #include "avdevice.h"
>>
>> +#define FIFO_SIZE 4
>> +
>>  static const int avf_time_base = 1000000;
>>
>>  static const AVRational avf_time_base_q = {
>> @@ -128,8 +132,8 @@ typedef struct
>>      AVCaptureSession         *capture_session;
>>      AVCaptureVideoDataOutput *video_output;
>>      AVCaptureAudioDataOutput *audio_output;
>> -    CMSampleBufferRef         current_frame;
>> -    CMSampleBufferRef         current_audio_frame;
>> +    AVFifoBuffer *video_fifo;
>> +    AVFifoBuffer *audio_fifo;
>>
>>      AVCaptureDevice          *observed_device;
>>  #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070
>> @@ -138,6 +142,11 @@ typedef struct
>>      int                      observed_quit;
>>  } AVFContext;
>>
>> +typedef struct {
>> +    int64_t ts;
>> +    CMSampleBufferRef frame;
>> +} BufferRef;
>> +
>>  static void lock_frames(AVFContext* ctx)
>>  {
>>      pthread_mutex_lock(&ctx->frame_lock);
>> @@ -148,6 +157,48 @@ static void unlock_frames(AVFContext* ctx)
>>      pthread_mutex_unlock(&ctx->frame_lock);
>>  }
>>
>> +static inline void fifo_write(AVFifoBuffer* f, int64_t ts,
>> CMSampleBufferRef frame)
>> +{
>> +    BufferRef buf = {
>> +        .ts    = ts,
>> +        .frame = frame,
>> +    };
>> +
>> +    CFRetain(frame);
>> +    av_fifo_generic_write(f, &buf, sizeof(BufferRef), NULL);
>> +}
>> +
>> +static inline void fifo_peek(AVFifoBuffer* f, BufferRef *buf)
>> +{
>> +    if (av_fifo_size(f)) {
>> +        av_fifo_generic_peek(f, buf, sizeof(BufferRef), NULL);
>> +        return;
>> +    }
>> +    buf->frame = nil;
>> +    return;
>> +}
>> +
>> +static inline void fifo_drain(AVFifoBuffer* f, int release)
>> +{
>> +    av_assert2(av_fifo_size(f) >= sizeof(BufferRef));
>> +    if (release) {
>> +        BufferRef buf;
>> +        fifo_peek(f, &buf);
>> +        CFRelease(buf.frame);
>> +    }
>> +    av_fifo_drain(f, sizeof(BufferRef));
>> +}
>> +
>> +static inline void fifo_freep(AVFifoBuffer **f)
>> +{
>> +    if (f) {
>> +        while (av_fifo_size(*f)) {
>> +            fifo_drain(*f, 1);
>> +        }
>> +        av_fifo_freep(f);
>> +    }
>> +}
>> +
>>  /** FrameReciever class - delegate for AVCaptureSession
>>   */
>>  @interface AVFFrameReceiver : NSObject
>> @@ -225,13 +276,16 @@ static void unlock_frames(AVFContext* ctx)
>>    didOutputSampleBuffer:(CMSampleBufferRef)videoFrame
>>           fromConnection:(AVCaptureConnection *)connection
>>  {
>> +    AVFifoBuffer *fifo = _context->video_fifo;
>> +    int64_t ts = av_gettime_relative();
>>      lock_frames(_context);
>>
>> -    if (_context->current_frame != nil) {
>> -        CFRelease(_context->current_frame);
>> +    if (av_fifo_space(fifo) == 0) {
>> +        av_log(_context, AV_LOG_DEBUG, "video fifo is full, the oldest
>> frame has been dropped\n");
>> +        fifo_drain(fifo, 1);
>>      }
>>
>> -    _context->current_frame = (CMSampleBufferRef)CFRetain(videoFrame);
>> +    fifo_write(fifo, ts, videoFrame);
>>
>>      unlock_frames(_context);
>>
>> @@ -269,13 +323,16 @@ static void unlock_frames(AVFContext* ctx)
>>    didOutputSampleBuffer:(CMSampleBufferRef)audioFrame
>>           fromConnection:(AVCaptureConnection *)connection
>>  {
>> +    AVFifoBuffer *fifo = _context->audio_fifo;
>> +    int64_t ts = av_gettime_relative();
>>      lock_frames(_context);
>>
>> -    if (_context->current_audio_frame != nil) {
>> -        CFRelease(_context->current_audio_frame);
>> +    if (!av_fifo_space(fifo)) {
>> +        av_log(_context, AV_LOG_DEBUG, "audio fifo is full, the oldest
>> frame has been dropped\n");
>> +        fifo_drain(fifo, 1);
>>      }
>>
>> -    _context->current_audio_frame =
>> (CMSampleBufferRef)CFRetain(audioFrame);
>> +    fifo_write(fifo, ts, audioFrame);
>>
>>      unlock_frames(_context);
>>
>> @@ -301,12 +358,10 @@ static void destroy_context(AVFContext* ctx)
>>      ctx->avf_audio_delegate = NULL;
>>
>>      av_freep(&ctx->audio_buffer);
>> +    fifo_freep(&ctx->video_fifo);
>> +    fifo_freep(&ctx->audio_fifo);
>>
>>      pthread_mutex_destroy(&ctx->frame_lock);
>> -
>> -    if (ctx->current_frame) {
>> -        CFRelease(ctx->current_frame);
>> -    }
>>  }
>>
>>  static void parse_device_name(AVFormatContext *s)
>> @@ -624,6 +679,7 @@ static int add_audio_device(AVFormatContext *s,
>> AVCaptureDevice *audio_device)
>>  static int get_video_config(AVFormatContext *s)
>>  {
>>      AVFContext *ctx = (AVFContext*)s->priv_data;
>> +    BufferRef buf;
>>      CVImageBufferRef image_buffer;
>>      CMBlockBufferRef block_buffer;
>>      CGSize image_buffer_size;
>> @@ -644,8 +700,13 @@ static int get_video_config(AVFormatContext *s)
>>
>>      avpriv_set_pts_info(stream, 64, 1, avf_time_base);
>>
>> -    image_buffer = CMSampleBufferGetImageBuffer(ctx->current_frame);
>> -    block_buffer = CMSampleBufferGetDataBuffer(ctx->current_frame);
>> +    fifo_peek(ctx->video_fifo, &buf);
>> +    if (buf.frame == nil) {
>> +        return 1;
>> +    }
>> +
>> +    image_buffer = CMSampleBufferGetImageBuffer(buf.frame);
>> +    block_buffer = CMSampleBufferGetDataBuffer(buf.frame);
>>
>>      if (image_buffer) {
>>          image_buffer_size = CVImageBufferGetEncodedSize(image_buffer);
>> @@ -661,9 +722,6 @@ static int get_video_config(AVFormatContext *s)
>>          stream->codecpar->format     = ctx->pixel_format;
>>      }
>>
>> -    CFRelease(ctx->current_frame);
>> -    ctx->current_frame = nil;
>> -
>>      unlock_frames(ctx);
>>
>>      return 0;
>> @@ -672,6 +730,7 @@ static int get_video_config(AVFormatContext *s)
>>  static int get_audio_config(AVFormatContext *s)
>>  {
>>      AVFContext *ctx = (AVFContext*)s->priv_data;
>> +    BufferRef buf;
>>      CMFormatDescriptionRef format_desc;
>>      AVStream* stream = avformat_new_stream(s, NULL);
>>
>> @@ -690,7 +749,12 @@ static int get_audio_config(AVFormatContext *s)
>>
>>      avpriv_set_pts_info(stream, 64, 1, avf_time_base);
>>
>> -    format_desc =
>> CMSampleBufferGetFormatDescription(ctx->current_audio_frame);
>> +    fifo_peek(ctx->audio_fifo, &buf);
>> +    if (buf.frame == nil) {
>> +        return 1;
>> +    }
>> +
>> +    format_desc = CMSampleBufferGetFormatDescription(buf.frame);
>>      const AudioStreamBasicDescription *basic_desc =
>> CMAudioFormatDescriptionGetStreamBasicDescription(format_desc);
>>
>>      if (!basic_desc) {
>> @@ -737,7 +801,7 @@ static int get_audio_config(AVFormatContext *s)
>>      }
>>
>>      if (ctx->audio_non_interleaved) {
>> -        CMBlockBufferRef block_buffer =
>> CMSampleBufferGetDataBuffer(ctx->current_audio_frame);
>> +        CMBlockBufferRef block_buffer =
>> CMSampleBufferGetDataBuffer(buf.frame);
>>          ctx->audio_buffer_size        =
>> CMBlockBufferGetDataLength(block_buffer);
>>          ctx->audio_buffer             =
>> av_malloc(ctx->audio_buffer_size);
>>          if (!ctx->audio_buffer) {
>> @@ -746,9 +810,6 @@ static int get_audio_config(AVFormatContext *s)
>>          }
>>      }
>>
>> -    CFRelease(ctx->current_audio_frame);
>> -    ctx->current_audio_frame = nil;
>> -
>>      unlock_frames(ctx);
>>
>>      return 0;
>> @@ -771,6 +832,9 @@ static int avf_read_header(AVFormatContext *s)
>>
>>      pthread_mutex_init(&ctx->frame_lock, NULL);
>>
>> +    ctx->video_fifo = av_fifo_alloc_array(FIFO_SIZE, sizeof(BufferRef));
>> +    ctx->audio_fifo = av_fifo_alloc_array(FIFO_SIZE, sizeof(BufferRef));
>> +
>>  #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070
>>      CGGetActiveDisplayList(0, NULL, &num_screens);
>>  #endif
>> @@ -1051,33 +1115,52 @@ static int avf_read_packet(AVFormatContext *s,
>> AVPacket *pkt)
>>      AVFContext* ctx = (AVFContext*)s->priv_data;
>>
>>      do {
>> +        BufferRef video;
>> +        BufferRef audio;
>>          CVImageBufferRef image_buffer;
>>          CMBlockBufferRef block_buffer;
>>          lock_frames(ctx);
>>
>> -        if (ctx->current_frame != nil) {
>> +        fifo_peek(ctx->video_fifo, &video);
>> +        fifo_peek(ctx->audio_fifo, &audio);
>> +
>> +        if (video.frame != nil && audio.frame != nil) {
>> +            // process oldest CMSampleBufferRef first
>> +            if (audio.ts <= video.ts) {
>> +                video.frame = nil;
>> +            } else {
>> +                audio.frame = nil;
>> +            }
>> +        }
>> +
>> +        if (video.frame != nil) {
>>              int status;
>>              int length = 0;
>>
>> -            image_buffer =
>> CMSampleBufferGetImageBuffer(ctx->current_frame);
>> -            block_buffer =
>> CMSampleBufferGetDataBuffer(ctx->current_frame);
>> +            fifo_drain(ctx->video_fifo, 0);
>> +            unlock_frames(ctx);
>> +
>> +            image_buffer = CMSampleBufferGetImageBuffer(video.frame);
>> +            block_buffer = CMSampleBufferGetDataBuffer(video.frame);
>>
>>              if (image_buffer != nil) {
>>                  length = (int)CVPixelBufferGetDataSize(image_buffer);
>>              } else if (block_buffer != nil) {
>>                  length = (int)CMBlockBufferGetDataLength(block_buffer);
>>              } else  {
>> +                CFRelease(video.frame);
>>                  return AVERROR(EINVAL);
>>              }
>>
>>              if (av_new_packet(pkt, length) < 0) {
>> +                CFRelease(video.frame);
>>                  return AVERROR(EIO);
>>              }
>>
>>              CMItemCount count;
>>              CMSampleTimingInfo timing_info;
>>
>> -            if
>> (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_frame, 1,
>> &timing_info, &count) == noErr) {
>> +            if
>> (CMSampleBufferGetOutputSampleTimingInfoArray(video.frame, 1, &timing_info,
>> &count) == noErr) {
>>                  AVRational timebase_q = av_make_q(1,
>> timing_info.presentationTimeStamp.timescale);
>>                  pkt->pts = pkt->dts =
>> av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q,
>> avf_time_base_q);
>>              }
>> @@ -1094,31 +1177,37 @@ static int avf_read_packet(AVFormatContext *s,
>> AVPacket *pkt)
>>                      status = AVERROR(EIO);
>>                  }
>>               }
>> -            CFRelease(ctx->current_frame);
>> -            ctx->current_frame = nil;
>> +            CFRelease(video.frame);
>>
>> -            if (status < 0)
>> +            if (status < 0) {
>>                  return status;
>> -        } else if (ctx->current_audio_frame != nil) {
>> -            CMBlockBufferRef block_buffer =
>> CMSampleBufferGetDataBuffer(ctx->current_audio_frame);
>> +            }
>> +        } else if (audio.frame != nil) {
>> +            CMBlockBufferRef block_buffer =
>> CMSampleBufferGetDataBuffer(audio.frame);
>>              int block_buffer_size         =
>> CMBlockBufferGetDataLength(block_buffer);
>>
>> +            fifo_drain(ctx->audio_fifo, 0);
>> +            unlock_frames(ctx);
>> +
>>              if (!block_buffer || !block_buffer_size) {
>> +                CFRelease(audio.frame);
>>                  return AVERROR(EIO);
>>              }
>>
>>              if (ctx->audio_non_interleaved && block_buffer_size >
>> ctx->audio_buffer_size) {
>> +                CFRelease(audio.frame);
>>                  return AVERROR_BUFFER_TOO_SMALL;
>>              }
>>
>>              if (av_new_packet(pkt, block_buffer_size) < 0) {
>> +                CFRelease(audio.frame);
>>                  return AVERROR(EIO);
>>              }
>>
>>              CMItemCount count;
>>              CMSampleTimingInfo timing_info;
>>
>> -            if
>> (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_audio_frame, 1,
>> &timing_info, &count) == noErr) {
>> +            if
>> (CMSampleBufferGetOutputSampleTimingInfoArray(audio.frame, 1, &timing_info,
>> &count) == noErr) {
>>                  AVRational timebase_q = av_make_q(1,
>> timing_info.presentationTimeStamp.timescale);
>>                  pkt->pts = pkt->dts =
>> av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q,
>> avf_time_base_q);
>>              }
>> @@ -1131,6 +1220,7 @@ static int avf_read_packet(AVFormatContext *s,
>> AVPacket *pkt)
>>
>>                  OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer,
>> 0, pkt->size, ctx->audio_buffer);
>>                  if (ret != kCMBlockBufferNoErr) {
>> +                    CFRelease(audio.frame);
>>                      return AVERROR(EIO);
>>                  }
>>
>> @@ -1162,12 +1252,12 @@ static int avf_read_packet(AVFormatContext *s,
>> AVPacket *pkt)
>>              } else {
>>                  OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer,
>> 0, pkt->size, pkt->data);
>>                  if (ret != kCMBlockBufferNoErr) {
>> +                    CFRelease(audio.frame);
>>                      return AVERROR(EIO);
>>                  }
>>              }
>>
>> -            CFRelease(ctx->current_audio_frame);
>> -            ctx->current_audio_frame = nil;
>> +            CFRelease(audio.frame);
>>          } else {
>>              pkt->data = NULL;
>>              unlock_frames(ctx);
>> @@ -1177,8 +1267,6 @@ static int avf_read_packet(AVFormatContext *s,
>> AVPacket *pkt)
>>                  return AVERROR(EAGAIN);
>>              }
>>          }
>> -
>> -        unlock_frames(ctx);
>>      } while (!pkt->data);
>>
>>      return 0;
>> --
>> 2.29.2
>>
>>
> ping
>

ping

>
diff mbox series

Patch

diff --git a/libavdevice/avfoundation.m b/libavdevice/avfoundation.m
index 59d5b0af4f..5ac6ec4183 100644
--- a/libavdevice/avfoundation.m
+++ b/libavdevice/avfoundation.m
@@ -31,13 +31,17 @@ 
 #include "libavutil/pixdesc.h"
 #include "libavutil/opt.h"
 #include "libavutil/avstring.h"
+#include "libavutil/avassert.h"
 #include "libavformat/internal.h"
 #include "libavutil/internal.h"
 #include "libavutil/parseutils.h"
 #include "libavutil/time.h"
 #include "libavutil/imgutils.h"
+#include "libavutil/fifo.h"
 #include "avdevice.h"
 
+#define FIFO_SIZE 4
+
 static const int avf_time_base = 1000000;
 
 static const AVRational avf_time_base_q = {
@@ -128,8 +132,8 @@  typedef struct
     AVCaptureSession         *capture_session;
     AVCaptureVideoDataOutput *video_output;
     AVCaptureAudioDataOutput *audio_output;
-    CMSampleBufferRef         current_frame;
-    CMSampleBufferRef         current_audio_frame;
+    AVFifoBuffer *video_fifo;
+    AVFifoBuffer *audio_fifo;
 
     AVCaptureDevice          *observed_device;
 #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070
@@ -138,6 +142,11 @@  typedef struct
     int                      observed_quit;
 } AVFContext;
 
+typedef struct {
+    int64_t ts;
+    CMSampleBufferRef frame;
+} BufferRef;
+
 static void lock_frames(AVFContext* ctx)
 {
     pthread_mutex_lock(&ctx->frame_lock);
@@ -148,6 +157,48 @@  static void unlock_frames(AVFContext* ctx)
     pthread_mutex_unlock(&ctx->frame_lock);
 }
 
+static inline void fifo_write(AVFifoBuffer* f, int64_t ts, CMSampleBufferRef frame)
+{
+    BufferRef buf = {
+        .ts    = ts,
+        .frame = frame,
+    };
+
+    CFRetain(frame);
+    av_fifo_generic_write(f, &buf, sizeof(BufferRef), NULL);
+}
+
+static inline void fifo_peek(AVFifoBuffer* f, BufferRef *buf)
+{
+    if (av_fifo_size(f)) {
+        av_fifo_generic_peek(f, buf, sizeof(BufferRef), NULL);
+        return;
+    }
+    buf->frame = nil;
+    return;
+}
+
+static inline void fifo_drain(AVFifoBuffer* f, int release)
+{
+    av_assert2(av_fifo_size(f) >= sizeof(BufferRef));
+    if (release) {
+        BufferRef buf;
+        fifo_peek(f, &buf);
+        CFRelease(buf.frame);
+    }
+    av_fifo_drain(f, sizeof(BufferRef));
+}
+
+static inline void fifo_freep(AVFifoBuffer **f)
+{
+    if (f) {
+        while (av_fifo_size(*f)) {
+            fifo_drain(*f, 1);
+        }
+        av_fifo_freep(f);
+    }
+}
+
 /** FrameReciever class - delegate for AVCaptureSession
  */
 @interface AVFFrameReceiver : NSObject
@@ -225,13 +276,16 @@  static void unlock_frames(AVFContext* ctx)
   didOutputSampleBuffer:(CMSampleBufferRef)videoFrame
          fromConnection:(AVCaptureConnection *)connection
 {
+    AVFifoBuffer *fifo = _context->video_fifo;
+    int64_t ts = av_gettime_relative();
     lock_frames(_context);
 
-    if (_context->current_frame != nil) {
-        CFRelease(_context->current_frame);
+    if (av_fifo_space(fifo) == 0) {
+        av_log(_context, AV_LOG_DEBUG, "video fifo is full, the oldest frame has been dropped\n");
+        fifo_drain(fifo, 1);
     }
 
-    _context->current_frame = (CMSampleBufferRef)CFRetain(videoFrame);
+    fifo_write(fifo, ts, videoFrame);
 
     unlock_frames(_context);
 
@@ -269,13 +323,16 @@  static void unlock_frames(AVFContext* ctx)
   didOutputSampleBuffer:(CMSampleBufferRef)audioFrame
          fromConnection:(AVCaptureConnection *)connection
 {
+    AVFifoBuffer *fifo = _context->audio_fifo;
+    int64_t ts = av_gettime_relative();
     lock_frames(_context);
 
-    if (_context->current_audio_frame != nil) {
-        CFRelease(_context->current_audio_frame);
+    if (!av_fifo_space(fifo)) {
+        av_log(_context, AV_LOG_DEBUG, "audio fifo is full, the oldest frame has been dropped\n");
+        fifo_drain(fifo, 1);
     }
 
-    _context->current_audio_frame = (CMSampleBufferRef)CFRetain(audioFrame);
+    fifo_write(fifo, ts, audioFrame);
 
     unlock_frames(_context);
 
@@ -301,12 +358,10 @@  static void destroy_context(AVFContext* ctx)
     ctx->avf_audio_delegate = NULL;
 
     av_freep(&ctx->audio_buffer);
+    fifo_freep(&ctx->video_fifo);
+    fifo_freep(&ctx->audio_fifo);
 
     pthread_mutex_destroy(&ctx->frame_lock);
-
-    if (ctx->current_frame) {
-        CFRelease(ctx->current_frame);
-    }
 }
 
 static void parse_device_name(AVFormatContext *s)
@@ -624,6 +679,7 @@  static int add_audio_device(AVFormatContext *s, AVCaptureDevice *audio_device)
 static int get_video_config(AVFormatContext *s)
 {
     AVFContext *ctx = (AVFContext*)s->priv_data;
+    BufferRef buf;
     CVImageBufferRef image_buffer;
     CMBlockBufferRef block_buffer;
     CGSize image_buffer_size;
@@ -644,8 +700,13 @@  static int get_video_config(AVFormatContext *s)
 
     avpriv_set_pts_info(stream, 64, 1, avf_time_base);
 
-    image_buffer = CMSampleBufferGetImageBuffer(ctx->current_frame);
-    block_buffer = CMSampleBufferGetDataBuffer(ctx->current_frame);
+    fifo_peek(ctx->video_fifo, &buf);
+    if (buf.frame == nil) {
+        return 1;
+    }
+
+    image_buffer = CMSampleBufferGetImageBuffer(buf.frame);
+    block_buffer = CMSampleBufferGetDataBuffer(buf.frame);
 
     if (image_buffer) {
         image_buffer_size = CVImageBufferGetEncodedSize(image_buffer);
@@ -661,9 +722,6 @@  static int get_video_config(AVFormatContext *s)
         stream->codecpar->format     = ctx->pixel_format;
     }
 
-    CFRelease(ctx->current_frame);
-    ctx->current_frame = nil;
-
     unlock_frames(ctx);
 
     return 0;
@@ -672,6 +730,7 @@  static int get_video_config(AVFormatContext *s)
 static int get_audio_config(AVFormatContext *s)
 {
     AVFContext *ctx = (AVFContext*)s->priv_data;
+    BufferRef buf;
     CMFormatDescriptionRef format_desc;
     AVStream* stream = avformat_new_stream(s, NULL);
 
@@ -690,7 +749,12 @@  static int get_audio_config(AVFormatContext *s)
 
     avpriv_set_pts_info(stream, 64, 1, avf_time_base);
 
-    format_desc = CMSampleBufferGetFormatDescription(ctx->current_audio_frame);
+    fifo_peek(ctx->audio_fifo, &buf);
+    if (buf.frame == nil) {
+        return 1;
+    }
+
+    format_desc = CMSampleBufferGetFormatDescription(buf.frame);
     const AudioStreamBasicDescription *basic_desc = CMAudioFormatDescriptionGetStreamBasicDescription(format_desc);
 
     if (!basic_desc) {
@@ -737,7 +801,7 @@  static int get_audio_config(AVFormatContext *s)
     }
 
     if (ctx->audio_non_interleaved) {
-        CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(ctx->current_audio_frame);
+        CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(buf.frame);
         ctx->audio_buffer_size        = CMBlockBufferGetDataLength(block_buffer);
         ctx->audio_buffer             = av_malloc(ctx->audio_buffer_size);
         if (!ctx->audio_buffer) {
@@ -746,9 +810,6 @@  static int get_audio_config(AVFormatContext *s)
         }
     }
 
-    CFRelease(ctx->current_audio_frame);
-    ctx->current_audio_frame = nil;
-
     unlock_frames(ctx);
 
     return 0;
@@ -771,6 +832,9 @@  static int avf_read_header(AVFormatContext *s)
 
     pthread_mutex_init(&ctx->frame_lock, NULL);
 
+    ctx->video_fifo = av_fifo_alloc_array(FIFO_SIZE, sizeof(BufferRef));
+    ctx->audio_fifo = av_fifo_alloc_array(FIFO_SIZE, sizeof(BufferRef));
+
 #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070
     CGGetActiveDisplayList(0, NULL, &num_screens);
 #endif
@@ -1051,33 +1115,52 @@  static int avf_read_packet(AVFormatContext *s, AVPacket *pkt)
     AVFContext* ctx = (AVFContext*)s->priv_data;
 
     do {
+        BufferRef video;
+        BufferRef audio;
         CVImageBufferRef image_buffer;
         CMBlockBufferRef block_buffer;
         lock_frames(ctx);
 
-        if (ctx->current_frame != nil) {
+        fifo_peek(ctx->video_fifo, &video);
+        fifo_peek(ctx->audio_fifo, &audio);
+
+        if (video.frame != nil && audio.frame != nil) {
+            // process oldest CMSampleBufferRef first
+            if (audio.ts <= video.ts) {
+                video.frame = nil;
+            } else {
+                audio.frame = nil;
+            }
+        }
+
+        if (video.frame != nil) {
             int status;
             int length = 0;
 
-            image_buffer = CMSampleBufferGetImageBuffer(ctx->current_frame);
-            block_buffer = CMSampleBufferGetDataBuffer(ctx->current_frame);
+            fifo_drain(ctx->video_fifo, 0);
+            unlock_frames(ctx);
+
+            image_buffer = CMSampleBufferGetImageBuffer(video.frame);
+            block_buffer = CMSampleBufferGetDataBuffer(video.frame);
 
             if (image_buffer != nil) {
                 length = (int)CVPixelBufferGetDataSize(image_buffer);
             } else if (block_buffer != nil) {
                 length = (int)CMBlockBufferGetDataLength(block_buffer);
             } else  {
+                CFRelease(video.frame);
                 return AVERROR(EINVAL);
             }
 
             if (av_new_packet(pkt, length) < 0) {
+                CFRelease(video.frame);
                 return AVERROR(EIO);
             }
 
             CMItemCount count;
             CMSampleTimingInfo timing_info;
 
-            if (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_frame, 1, &timing_info, &count) == noErr) {
+            if (CMSampleBufferGetOutputSampleTimingInfoArray(video.frame, 1, &timing_info, &count) == noErr) {
                 AVRational timebase_q = av_make_q(1, timing_info.presentationTimeStamp.timescale);
                 pkt->pts = pkt->dts = av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q, avf_time_base_q);
             }
@@ -1094,31 +1177,37 @@  static int avf_read_packet(AVFormatContext *s, AVPacket *pkt)
                     status = AVERROR(EIO);
                 }
              }
-            CFRelease(ctx->current_frame);
-            ctx->current_frame = nil;
+            CFRelease(video.frame);
 
-            if (status < 0)
+            if (status < 0) {
                 return status;
-        } else if (ctx->current_audio_frame != nil) {
-            CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(ctx->current_audio_frame);
+            }
+        } else if (audio.frame != nil) {
+            CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(audio.frame);
             int block_buffer_size         = CMBlockBufferGetDataLength(block_buffer);
 
+            fifo_drain(ctx->audio_fifo, 0);
+            unlock_frames(ctx);
+
             if (!block_buffer || !block_buffer_size) {
+                CFRelease(audio.frame);
                 return AVERROR(EIO);
             }
 
             if (ctx->audio_non_interleaved && block_buffer_size > ctx->audio_buffer_size) {
+                CFRelease(audio.frame);
                 return AVERROR_BUFFER_TOO_SMALL;
             }
 
             if (av_new_packet(pkt, block_buffer_size) < 0) {
+                CFRelease(audio.frame);
                 return AVERROR(EIO);
             }
 
             CMItemCount count;
             CMSampleTimingInfo timing_info;
 
-            if (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_audio_frame, 1, &timing_info, &count) == noErr) {
+            if (CMSampleBufferGetOutputSampleTimingInfoArray(audio.frame, 1, &timing_info, &count) == noErr) {
                 AVRational timebase_q = av_make_q(1, timing_info.presentationTimeStamp.timescale);
                 pkt->pts = pkt->dts = av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q, avf_time_base_q);
             }
@@ -1131,6 +1220,7 @@  static int avf_read_packet(AVFormatContext *s, AVPacket *pkt)
 
                 OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, ctx->audio_buffer);
                 if (ret != kCMBlockBufferNoErr) {
+                    CFRelease(audio.frame);
                     return AVERROR(EIO);
                 }
 
@@ -1162,12 +1252,12 @@  static int avf_read_packet(AVFormatContext *s, AVPacket *pkt)
             } else {
                 OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, pkt->data);
                 if (ret != kCMBlockBufferNoErr) {
+                    CFRelease(audio.frame);
                     return AVERROR(EIO);
                 }
             }
 
-            CFRelease(ctx->current_audio_frame);
-            ctx->current_audio_frame = nil;
+            CFRelease(audio.frame);
         } else {
             pkt->data = NULL;
             unlock_frames(ctx);
@@ -1177,8 +1267,6 @@  static int avf_read_packet(AVFormatContext *s, AVPacket *pkt)
                 return AVERROR(EAGAIN);
             }
         }
-
-        unlock_frames(ctx);
     } while (!pkt->data);
 
     return 0;