Message ID | 20210214060442.20157-1-mindmark@gmail.com |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel] libavdevice/avfoundation: add buffer fifo and output packets in order they arrive | expand |
Context | Check | Description |
---|---|---|
andriy/x86_make | success | Make finished |
andriy/x86_make_fate | success | Make fate finished |
andriy/PPC64_make | success | Make finished |
andriy/PPC64_make_fate | success | Make fate finished |
On Sat, Feb 13, 2021 at 10:04 PM <mindmark@gmail.com> wrote: > From: Mark Reid <mindmark@gmail.com> > > Hi, > This patch fixes audio issues I've had with some capture devices. The audio > gets really choppy and stops working. This seems to be because > avf_read_packet > stops outputting the audio frames because a video frame happens to be > available first. > > It base on the approach used in a patch from #4437 > https://trac.ffmpeg.org/ticket/4437 > > My approach uses an AVFifoBuffer instead of NSMutableArray and also > outputs the packets in the same order they arrive from AVFFoundation. > > should fix ticket #4437 and #4513 > > > --- > libavdevice/avfoundation.m | 160 ++++++++++++++++++++++++++++--------- > 1 file changed, 124 insertions(+), 36 deletions(-) > > diff --git a/libavdevice/avfoundation.m b/libavdevice/avfoundation.m > index 59d5b0af4f..5ac6ec4183 100644 > --- a/libavdevice/avfoundation.m > +++ b/libavdevice/avfoundation.m > @@ -31,13 +31,17 @@ > #include "libavutil/pixdesc.h" > #include "libavutil/opt.h" > #include "libavutil/avstring.h" > +#include "libavutil/avassert.h" > #include "libavformat/internal.h" > #include "libavutil/internal.h" > #include "libavutil/parseutils.h" > #include "libavutil/time.h" > #include "libavutil/imgutils.h" > +#include "libavutil/fifo.h" > #include "avdevice.h" > > +#define FIFO_SIZE 4 > + > static const int avf_time_base = 1000000; > > static const AVRational avf_time_base_q = { > @@ -128,8 +132,8 @@ typedef struct > AVCaptureSession *capture_session; > AVCaptureVideoDataOutput *video_output; > AVCaptureAudioDataOutput *audio_output; > - CMSampleBufferRef current_frame; > - CMSampleBufferRef current_audio_frame; > + AVFifoBuffer *video_fifo; > + AVFifoBuffer *audio_fifo; > > AVCaptureDevice *observed_device; > #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070 > @@ -138,6 +142,11 @@ typedef struct > int observed_quit; > } AVFContext; > > +typedef struct { > + int64_t ts; > + CMSampleBufferRef frame; > +} BufferRef; > + > static void lock_frames(AVFContext* ctx) > { > pthread_mutex_lock(&ctx->frame_lock); > @@ -148,6 +157,48 @@ static void unlock_frames(AVFContext* ctx) > pthread_mutex_unlock(&ctx->frame_lock); > } > > +static inline void fifo_write(AVFifoBuffer* f, int64_t ts, > CMSampleBufferRef frame) > +{ > + BufferRef buf = { > + .ts = ts, > + .frame = frame, > + }; > + > + CFRetain(frame); > + av_fifo_generic_write(f, &buf, sizeof(BufferRef), NULL); > +} > + > +static inline void fifo_peek(AVFifoBuffer* f, BufferRef *buf) > +{ > + if (av_fifo_size(f)) { > + av_fifo_generic_peek(f, buf, sizeof(BufferRef), NULL); > + return; > + } > + buf->frame = nil; > + return; > +} > + > +static inline void fifo_drain(AVFifoBuffer* f, int release) > +{ > + av_assert2(av_fifo_size(f) >= sizeof(BufferRef)); > + if (release) { > + BufferRef buf; > + fifo_peek(f, &buf); > + CFRelease(buf.frame); > + } > + av_fifo_drain(f, sizeof(BufferRef)); > +} > + > +static inline void fifo_freep(AVFifoBuffer **f) > +{ > + if (f) { > + while (av_fifo_size(*f)) { > + fifo_drain(*f, 1); > + } > + av_fifo_freep(f); > + } > +} > + > /** FrameReciever class - delegate for AVCaptureSession > */ > @interface AVFFrameReceiver : NSObject > @@ -225,13 +276,16 @@ static void unlock_frames(AVFContext* ctx) > didOutputSampleBuffer:(CMSampleBufferRef)videoFrame > fromConnection:(AVCaptureConnection *)connection > { > + AVFifoBuffer *fifo = _context->video_fifo; > + int64_t ts = av_gettime_relative(); > lock_frames(_context); > > - if (_context->current_frame != nil) { > - CFRelease(_context->current_frame); > + if (av_fifo_space(fifo) == 0) { > + av_log(_context, AV_LOG_DEBUG, "video fifo is full, the oldest > frame has been dropped\n"); > + fifo_drain(fifo, 1); > } > > - _context->current_frame = (CMSampleBufferRef)CFRetain(videoFrame); > + fifo_write(fifo, ts, videoFrame); > > unlock_frames(_context); > > @@ -269,13 +323,16 @@ static void unlock_frames(AVFContext* ctx) > didOutputSampleBuffer:(CMSampleBufferRef)audioFrame > fromConnection:(AVCaptureConnection *)connection > { > + AVFifoBuffer *fifo = _context->audio_fifo; > + int64_t ts = av_gettime_relative(); > lock_frames(_context); > > - if (_context->current_audio_frame != nil) { > - CFRelease(_context->current_audio_frame); > + if (!av_fifo_space(fifo)) { > + av_log(_context, AV_LOG_DEBUG, "audio fifo is full, the oldest > frame has been dropped\n"); > + fifo_drain(fifo, 1); > } > > - _context->current_audio_frame = > (CMSampleBufferRef)CFRetain(audioFrame); > + fifo_write(fifo, ts, audioFrame); > > unlock_frames(_context); > > @@ -301,12 +358,10 @@ static void destroy_context(AVFContext* ctx) > ctx->avf_audio_delegate = NULL; > > av_freep(&ctx->audio_buffer); > + fifo_freep(&ctx->video_fifo); > + fifo_freep(&ctx->audio_fifo); > > pthread_mutex_destroy(&ctx->frame_lock); > - > - if (ctx->current_frame) { > - CFRelease(ctx->current_frame); > - } > } > > static void parse_device_name(AVFormatContext *s) > @@ -624,6 +679,7 @@ static int add_audio_device(AVFormatContext *s, > AVCaptureDevice *audio_device) > static int get_video_config(AVFormatContext *s) > { > AVFContext *ctx = (AVFContext*)s->priv_data; > + BufferRef buf; > CVImageBufferRef image_buffer; > CMBlockBufferRef block_buffer; > CGSize image_buffer_size; > @@ -644,8 +700,13 @@ static int get_video_config(AVFormatContext *s) > > avpriv_set_pts_info(stream, 64, 1, avf_time_base); > > - image_buffer = CMSampleBufferGetImageBuffer(ctx->current_frame); > - block_buffer = CMSampleBufferGetDataBuffer(ctx->current_frame); > + fifo_peek(ctx->video_fifo, &buf); > + if (buf.frame == nil) { > + return 1; > + } > + > + image_buffer = CMSampleBufferGetImageBuffer(buf.frame); > + block_buffer = CMSampleBufferGetDataBuffer(buf.frame); > > if (image_buffer) { > image_buffer_size = CVImageBufferGetEncodedSize(image_buffer); > @@ -661,9 +722,6 @@ static int get_video_config(AVFormatContext *s) > stream->codecpar->format = ctx->pixel_format; > } > > - CFRelease(ctx->current_frame); > - ctx->current_frame = nil; > - > unlock_frames(ctx); > > return 0; > @@ -672,6 +730,7 @@ static int get_video_config(AVFormatContext *s) > static int get_audio_config(AVFormatContext *s) > { > AVFContext *ctx = (AVFContext*)s->priv_data; > + BufferRef buf; > CMFormatDescriptionRef format_desc; > AVStream* stream = avformat_new_stream(s, NULL); > > @@ -690,7 +749,12 @@ static int get_audio_config(AVFormatContext *s) > > avpriv_set_pts_info(stream, 64, 1, avf_time_base); > > - format_desc = > CMSampleBufferGetFormatDescription(ctx->current_audio_frame); > + fifo_peek(ctx->audio_fifo, &buf); > + if (buf.frame == nil) { > + return 1; > + } > + > + format_desc = CMSampleBufferGetFormatDescription(buf.frame); > const AudioStreamBasicDescription *basic_desc = > CMAudioFormatDescriptionGetStreamBasicDescription(format_desc); > > if (!basic_desc) { > @@ -737,7 +801,7 @@ static int get_audio_config(AVFormatContext *s) > } > > if (ctx->audio_non_interleaved) { > - CMBlockBufferRef block_buffer = > CMSampleBufferGetDataBuffer(ctx->current_audio_frame); > + CMBlockBufferRef block_buffer = > CMSampleBufferGetDataBuffer(buf.frame); > ctx->audio_buffer_size = > CMBlockBufferGetDataLength(block_buffer); > ctx->audio_buffer = av_malloc(ctx->audio_buffer_size); > if (!ctx->audio_buffer) { > @@ -746,9 +810,6 @@ static int get_audio_config(AVFormatContext *s) > } > } > > - CFRelease(ctx->current_audio_frame); > - ctx->current_audio_frame = nil; > - > unlock_frames(ctx); > > return 0; > @@ -771,6 +832,9 @@ static int avf_read_header(AVFormatContext *s) > > pthread_mutex_init(&ctx->frame_lock, NULL); > > + ctx->video_fifo = av_fifo_alloc_array(FIFO_SIZE, sizeof(BufferRef)); > + ctx->audio_fifo = av_fifo_alloc_array(FIFO_SIZE, sizeof(BufferRef)); > + > #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070 > CGGetActiveDisplayList(0, NULL, &num_screens); > #endif > @@ -1051,33 +1115,52 @@ static int avf_read_packet(AVFormatContext *s, > AVPacket *pkt) > AVFContext* ctx = (AVFContext*)s->priv_data; > > do { > + BufferRef video; > + BufferRef audio; > CVImageBufferRef image_buffer; > CMBlockBufferRef block_buffer; > lock_frames(ctx); > > - if (ctx->current_frame != nil) { > + fifo_peek(ctx->video_fifo, &video); > + fifo_peek(ctx->audio_fifo, &audio); > + > + if (video.frame != nil && audio.frame != nil) { > + // process oldest CMSampleBufferRef first > + if (audio.ts <= video.ts) { > + video.frame = nil; > + } else { > + audio.frame = nil; > + } > + } > + > + if (video.frame != nil) { > int status; > int length = 0; > > - image_buffer = > CMSampleBufferGetImageBuffer(ctx->current_frame); > - block_buffer = > CMSampleBufferGetDataBuffer(ctx->current_frame); > + fifo_drain(ctx->video_fifo, 0); > + unlock_frames(ctx); > + > + image_buffer = CMSampleBufferGetImageBuffer(video.frame); > + block_buffer = CMSampleBufferGetDataBuffer(video.frame); > > if (image_buffer != nil) { > length = (int)CVPixelBufferGetDataSize(image_buffer); > } else if (block_buffer != nil) { > length = (int)CMBlockBufferGetDataLength(block_buffer); > } else { > + CFRelease(video.frame); > return AVERROR(EINVAL); > } > > if (av_new_packet(pkt, length) < 0) { > + CFRelease(video.frame); > return AVERROR(EIO); > } > > CMItemCount count; > CMSampleTimingInfo timing_info; > > - if > (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_frame, 1, > &timing_info, &count) == noErr) { > + if (CMSampleBufferGetOutputSampleTimingInfoArray(video.frame, > 1, &timing_info, &count) == noErr) { > AVRational timebase_q = av_make_q(1, > timing_info.presentationTimeStamp.timescale); > pkt->pts = pkt->dts = > av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q, > avf_time_base_q); > } > @@ -1094,31 +1177,37 @@ static int avf_read_packet(AVFormatContext *s, > AVPacket *pkt) > status = AVERROR(EIO); > } > } > - CFRelease(ctx->current_frame); > - ctx->current_frame = nil; > + CFRelease(video.frame); > > - if (status < 0) > + if (status < 0) { > return status; > - } else if (ctx->current_audio_frame != nil) { > - CMBlockBufferRef block_buffer = > CMSampleBufferGetDataBuffer(ctx->current_audio_frame); > + } > + } else if (audio.frame != nil) { > + CMBlockBufferRef block_buffer = > CMSampleBufferGetDataBuffer(audio.frame); > int block_buffer_size = > CMBlockBufferGetDataLength(block_buffer); > > + fifo_drain(ctx->audio_fifo, 0); > + unlock_frames(ctx); > + > if (!block_buffer || !block_buffer_size) { > + CFRelease(audio.frame); > return AVERROR(EIO); > } > > if (ctx->audio_non_interleaved && block_buffer_size > > ctx->audio_buffer_size) { > + CFRelease(audio.frame); > return AVERROR_BUFFER_TOO_SMALL; > } > > if (av_new_packet(pkt, block_buffer_size) < 0) { > + CFRelease(audio.frame); > return AVERROR(EIO); > } > > CMItemCount count; > CMSampleTimingInfo timing_info; > > - if > (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_audio_frame, 1, > &timing_info, &count) == noErr) { > + if (CMSampleBufferGetOutputSampleTimingInfoArray(audio.frame, > 1, &timing_info, &count) == noErr) { > AVRational timebase_q = av_make_q(1, > timing_info.presentationTimeStamp.timescale); > pkt->pts = pkt->dts = > av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q, > avf_time_base_q); > } > @@ -1131,6 +1220,7 @@ static int avf_read_packet(AVFormatContext *s, > AVPacket *pkt) > > OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, > 0, pkt->size, ctx->audio_buffer); > if (ret != kCMBlockBufferNoErr) { > + CFRelease(audio.frame); > return AVERROR(EIO); > } > > @@ -1162,12 +1252,12 @@ static int avf_read_packet(AVFormatContext *s, > AVPacket *pkt) > } else { > OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, > 0, pkt->size, pkt->data); > if (ret != kCMBlockBufferNoErr) { > + CFRelease(audio.frame); > return AVERROR(EIO); > } > } > > - CFRelease(ctx->current_audio_frame); > - ctx->current_audio_frame = nil; > + CFRelease(audio.frame); > } else { > pkt->data = NULL; > unlock_frames(ctx); > @@ -1177,8 +1267,6 @@ static int avf_read_packet(AVFormatContext *s, > AVPacket *pkt) > return AVERROR(EAGAIN); > } > } > - > - unlock_frames(ctx); > } while (!pkt->data); > > return 0; > -- > 2.29.2 > > ping
On Sun., Feb. 28, 2021, 8:30 p.m. Mark Reid, <mindmark@gmail.com> wrote: > > > On Sat, Feb 13, 2021 at 10:04 PM <mindmark@gmail.com> wrote: > >> From: Mark Reid <mindmark@gmail.com> >> >> Hi, >> This patch fixes audio issues I've had with some capture devices. The >> audio >> gets really choppy and stops working. This seems to be because >> avf_read_packet >> stops outputting the audio frames because a video frame happens to be >> available first. >> >> It base on the approach used in a patch from #4437 >> https://trac.ffmpeg.org/ticket/4437 >> >> My approach uses an AVFifoBuffer instead of NSMutableArray and also >> outputs the packets in the same order they arrive from AVFFoundation. >> >> should fix ticket #4437 and #4513 >> >> >> --- >> libavdevice/avfoundation.m | 160 ++++++++++++++++++++++++++++--------- >> 1 file changed, 124 insertions(+), 36 deletions(-) >> >> diff --git a/libavdevice/avfoundation.m b/libavdevice/avfoundation.m >> index 59d5b0af4f..5ac6ec4183 100644 >> --- a/libavdevice/avfoundation.m >> +++ b/libavdevice/avfoundation.m >> @@ -31,13 +31,17 @@ >> #include "libavutil/pixdesc.h" >> #include "libavutil/opt.h" >> #include "libavutil/avstring.h" >> +#include "libavutil/avassert.h" >> #include "libavformat/internal.h" >> #include "libavutil/internal.h" >> #include "libavutil/parseutils.h" >> #include "libavutil/time.h" >> #include "libavutil/imgutils.h" >> +#include "libavutil/fifo.h" >> #include "avdevice.h" >> >> +#define FIFO_SIZE 4 >> + >> static const int avf_time_base = 1000000; >> >> static const AVRational avf_time_base_q = { >> @@ -128,8 +132,8 @@ typedef struct >> AVCaptureSession *capture_session; >> AVCaptureVideoDataOutput *video_output; >> AVCaptureAudioDataOutput *audio_output; >> - CMSampleBufferRef current_frame; >> - CMSampleBufferRef current_audio_frame; >> + AVFifoBuffer *video_fifo; >> + AVFifoBuffer *audio_fifo; >> >> AVCaptureDevice *observed_device; >> #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070 >> @@ -138,6 +142,11 @@ typedef struct >> int observed_quit; >> } AVFContext; >> >> +typedef struct { >> + int64_t ts; >> + CMSampleBufferRef frame; >> +} BufferRef; >> + >> static void lock_frames(AVFContext* ctx) >> { >> pthread_mutex_lock(&ctx->frame_lock); >> @@ -148,6 +157,48 @@ static void unlock_frames(AVFContext* ctx) >> pthread_mutex_unlock(&ctx->frame_lock); >> } >> >> +static inline void fifo_write(AVFifoBuffer* f, int64_t ts, >> CMSampleBufferRef frame) >> +{ >> + BufferRef buf = { >> + .ts = ts, >> + .frame = frame, >> + }; >> + >> + CFRetain(frame); >> + av_fifo_generic_write(f, &buf, sizeof(BufferRef), NULL); >> +} >> + >> +static inline void fifo_peek(AVFifoBuffer* f, BufferRef *buf) >> +{ >> + if (av_fifo_size(f)) { >> + av_fifo_generic_peek(f, buf, sizeof(BufferRef), NULL); >> + return; >> + } >> + buf->frame = nil; >> + return; >> +} >> + >> +static inline void fifo_drain(AVFifoBuffer* f, int release) >> +{ >> + av_assert2(av_fifo_size(f) >= sizeof(BufferRef)); >> + if (release) { >> + BufferRef buf; >> + fifo_peek(f, &buf); >> + CFRelease(buf.frame); >> + } >> + av_fifo_drain(f, sizeof(BufferRef)); >> +} >> + >> +static inline void fifo_freep(AVFifoBuffer **f) >> +{ >> + if (f) { >> + while (av_fifo_size(*f)) { >> + fifo_drain(*f, 1); >> + } >> + av_fifo_freep(f); >> + } >> +} >> + >> /** FrameReciever class - delegate for AVCaptureSession >> */ >> @interface AVFFrameReceiver : NSObject >> @@ -225,13 +276,16 @@ static void unlock_frames(AVFContext* ctx) >> didOutputSampleBuffer:(CMSampleBufferRef)videoFrame >> fromConnection:(AVCaptureConnection *)connection >> { >> + AVFifoBuffer *fifo = _context->video_fifo; >> + int64_t ts = av_gettime_relative(); >> lock_frames(_context); >> >> - if (_context->current_frame != nil) { >> - CFRelease(_context->current_frame); >> + if (av_fifo_space(fifo) == 0) { >> + av_log(_context, AV_LOG_DEBUG, "video fifo is full, the oldest >> frame has been dropped\n"); >> + fifo_drain(fifo, 1); >> } >> >> - _context->current_frame = (CMSampleBufferRef)CFRetain(videoFrame); >> + fifo_write(fifo, ts, videoFrame); >> >> unlock_frames(_context); >> >> @@ -269,13 +323,16 @@ static void unlock_frames(AVFContext* ctx) >> didOutputSampleBuffer:(CMSampleBufferRef)audioFrame >> fromConnection:(AVCaptureConnection *)connection >> { >> + AVFifoBuffer *fifo = _context->audio_fifo; >> + int64_t ts = av_gettime_relative(); >> lock_frames(_context); >> >> - if (_context->current_audio_frame != nil) { >> - CFRelease(_context->current_audio_frame); >> + if (!av_fifo_space(fifo)) { >> + av_log(_context, AV_LOG_DEBUG, "audio fifo is full, the oldest >> frame has been dropped\n"); >> + fifo_drain(fifo, 1); >> } >> >> - _context->current_audio_frame = >> (CMSampleBufferRef)CFRetain(audioFrame); >> + fifo_write(fifo, ts, audioFrame); >> >> unlock_frames(_context); >> >> @@ -301,12 +358,10 @@ static void destroy_context(AVFContext* ctx) >> ctx->avf_audio_delegate = NULL; >> >> av_freep(&ctx->audio_buffer); >> + fifo_freep(&ctx->video_fifo); >> + fifo_freep(&ctx->audio_fifo); >> >> pthread_mutex_destroy(&ctx->frame_lock); >> - >> - if (ctx->current_frame) { >> - CFRelease(ctx->current_frame); >> - } >> } >> >> static void parse_device_name(AVFormatContext *s) >> @@ -624,6 +679,7 @@ static int add_audio_device(AVFormatContext *s, >> AVCaptureDevice *audio_device) >> static int get_video_config(AVFormatContext *s) >> { >> AVFContext *ctx = (AVFContext*)s->priv_data; >> + BufferRef buf; >> CVImageBufferRef image_buffer; >> CMBlockBufferRef block_buffer; >> CGSize image_buffer_size; >> @@ -644,8 +700,13 @@ static int get_video_config(AVFormatContext *s) >> >> avpriv_set_pts_info(stream, 64, 1, avf_time_base); >> >> - image_buffer = CMSampleBufferGetImageBuffer(ctx->current_frame); >> - block_buffer = CMSampleBufferGetDataBuffer(ctx->current_frame); >> + fifo_peek(ctx->video_fifo, &buf); >> + if (buf.frame == nil) { >> + return 1; >> + } >> + >> + image_buffer = CMSampleBufferGetImageBuffer(buf.frame); >> + block_buffer = CMSampleBufferGetDataBuffer(buf.frame); >> >> if (image_buffer) { >> image_buffer_size = CVImageBufferGetEncodedSize(image_buffer); >> @@ -661,9 +722,6 @@ static int get_video_config(AVFormatContext *s) >> stream->codecpar->format = ctx->pixel_format; >> } >> >> - CFRelease(ctx->current_frame); >> - ctx->current_frame = nil; >> - >> unlock_frames(ctx); >> >> return 0; >> @@ -672,6 +730,7 @@ static int get_video_config(AVFormatContext *s) >> static int get_audio_config(AVFormatContext *s) >> { >> AVFContext *ctx = (AVFContext*)s->priv_data; >> + BufferRef buf; >> CMFormatDescriptionRef format_desc; >> AVStream* stream = avformat_new_stream(s, NULL); >> >> @@ -690,7 +749,12 @@ static int get_audio_config(AVFormatContext *s) >> >> avpriv_set_pts_info(stream, 64, 1, avf_time_base); >> >> - format_desc = >> CMSampleBufferGetFormatDescription(ctx->current_audio_frame); >> + fifo_peek(ctx->audio_fifo, &buf); >> + if (buf.frame == nil) { >> + return 1; >> + } >> + >> + format_desc = CMSampleBufferGetFormatDescription(buf.frame); >> const AudioStreamBasicDescription *basic_desc = >> CMAudioFormatDescriptionGetStreamBasicDescription(format_desc); >> >> if (!basic_desc) { >> @@ -737,7 +801,7 @@ static int get_audio_config(AVFormatContext *s) >> } >> >> if (ctx->audio_non_interleaved) { >> - CMBlockBufferRef block_buffer = >> CMSampleBufferGetDataBuffer(ctx->current_audio_frame); >> + CMBlockBufferRef block_buffer = >> CMSampleBufferGetDataBuffer(buf.frame); >> ctx->audio_buffer_size = >> CMBlockBufferGetDataLength(block_buffer); >> ctx->audio_buffer = >> av_malloc(ctx->audio_buffer_size); >> if (!ctx->audio_buffer) { >> @@ -746,9 +810,6 @@ static int get_audio_config(AVFormatContext *s) >> } >> } >> >> - CFRelease(ctx->current_audio_frame); >> - ctx->current_audio_frame = nil; >> - >> unlock_frames(ctx); >> >> return 0; >> @@ -771,6 +832,9 @@ static int avf_read_header(AVFormatContext *s) >> >> pthread_mutex_init(&ctx->frame_lock, NULL); >> >> + ctx->video_fifo = av_fifo_alloc_array(FIFO_SIZE, sizeof(BufferRef)); >> + ctx->audio_fifo = av_fifo_alloc_array(FIFO_SIZE, sizeof(BufferRef)); >> + >> #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070 >> CGGetActiveDisplayList(0, NULL, &num_screens); >> #endif >> @@ -1051,33 +1115,52 @@ static int avf_read_packet(AVFormatContext *s, >> AVPacket *pkt) >> AVFContext* ctx = (AVFContext*)s->priv_data; >> >> do { >> + BufferRef video; >> + BufferRef audio; >> CVImageBufferRef image_buffer; >> CMBlockBufferRef block_buffer; >> lock_frames(ctx); >> >> - if (ctx->current_frame != nil) { >> + fifo_peek(ctx->video_fifo, &video); >> + fifo_peek(ctx->audio_fifo, &audio); >> + >> + if (video.frame != nil && audio.frame != nil) { >> + // process oldest CMSampleBufferRef first >> + if (audio.ts <= video.ts) { >> + video.frame = nil; >> + } else { >> + audio.frame = nil; >> + } >> + } >> + >> + if (video.frame != nil) { >> int status; >> int length = 0; >> >> - image_buffer = >> CMSampleBufferGetImageBuffer(ctx->current_frame); >> - block_buffer = >> CMSampleBufferGetDataBuffer(ctx->current_frame); >> + fifo_drain(ctx->video_fifo, 0); >> + unlock_frames(ctx); >> + >> + image_buffer = CMSampleBufferGetImageBuffer(video.frame); >> + block_buffer = CMSampleBufferGetDataBuffer(video.frame); >> >> if (image_buffer != nil) { >> length = (int)CVPixelBufferGetDataSize(image_buffer); >> } else if (block_buffer != nil) { >> length = (int)CMBlockBufferGetDataLength(block_buffer); >> } else { >> + CFRelease(video.frame); >> return AVERROR(EINVAL); >> } >> >> if (av_new_packet(pkt, length) < 0) { >> + CFRelease(video.frame); >> return AVERROR(EIO); >> } >> >> CMItemCount count; >> CMSampleTimingInfo timing_info; >> >> - if >> (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_frame, 1, >> &timing_info, &count) == noErr) { >> + if >> (CMSampleBufferGetOutputSampleTimingInfoArray(video.frame, 1, &timing_info, >> &count) == noErr) { >> AVRational timebase_q = av_make_q(1, >> timing_info.presentationTimeStamp.timescale); >> pkt->pts = pkt->dts = >> av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q, >> avf_time_base_q); >> } >> @@ -1094,31 +1177,37 @@ static int avf_read_packet(AVFormatContext *s, >> AVPacket *pkt) >> status = AVERROR(EIO); >> } >> } >> - CFRelease(ctx->current_frame); >> - ctx->current_frame = nil; >> + CFRelease(video.frame); >> >> - if (status < 0) >> + if (status < 0) { >> return status; >> - } else if (ctx->current_audio_frame != nil) { >> - CMBlockBufferRef block_buffer = >> CMSampleBufferGetDataBuffer(ctx->current_audio_frame); >> + } >> + } else if (audio.frame != nil) { >> + CMBlockBufferRef block_buffer = >> CMSampleBufferGetDataBuffer(audio.frame); >> int block_buffer_size = >> CMBlockBufferGetDataLength(block_buffer); >> >> + fifo_drain(ctx->audio_fifo, 0); >> + unlock_frames(ctx); >> + >> if (!block_buffer || !block_buffer_size) { >> + CFRelease(audio.frame); >> return AVERROR(EIO); >> } >> >> if (ctx->audio_non_interleaved && block_buffer_size > >> ctx->audio_buffer_size) { >> + CFRelease(audio.frame); >> return AVERROR_BUFFER_TOO_SMALL; >> } >> >> if (av_new_packet(pkt, block_buffer_size) < 0) { >> + CFRelease(audio.frame); >> return AVERROR(EIO); >> } >> >> CMItemCount count; >> CMSampleTimingInfo timing_info; >> >> - if >> (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_audio_frame, 1, >> &timing_info, &count) == noErr) { >> + if >> (CMSampleBufferGetOutputSampleTimingInfoArray(audio.frame, 1, &timing_info, >> &count) == noErr) { >> AVRational timebase_q = av_make_q(1, >> timing_info.presentationTimeStamp.timescale); >> pkt->pts = pkt->dts = >> av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q, >> avf_time_base_q); >> } >> @@ -1131,6 +1220,7 @@ static int avf_read_packet(AVFormatContext *s, >> AVPacket *pkt) >> >> OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, >> 0, pkt->size, ctx->audio_buffer); >> if (ret != kCMBlockBufferNoErr) { >> + CFRelease(audio.frame); >> return AVERROR(EIO); >> } >> >> @@ -1162,12 +1252,12 @@ static int avf_read_packet(AVFormatContext *s, >> AVPacket *pkt) >> } else { >> OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, >> 0, pkt->size, pkt->data); >> if (ret != kCMBlockBufferNoErr) { >> + CFRelease(audio.frame); >> return AVERROR(EIO); >> } >> } >> >> - CFRelease(ctx->current_audio_frame); >> - ctx->current_audio_frame = nil; >> + CFRelease(audio.frame); >> } else { >> pkt->data = NULL; >> unlock_frames(ctx); >> @@ -1177,8 +1267,6 @@ static int avf_read_packet(AVFormatContext *s, >> AVPacket *pkt) >> return AVERROR(EAGAIN); >> } >> } >> - >> - unlock_frames(ctx); >> } while (!pkt->data); >> >> return 0; >> -- >> 2.29.2 >> >> > ping > ping >
diff --git a/libavdevice/avfoundation.m b/libavdevice/avfoundation.m index 59d5b0af4f..5ac6ec4183 100644 --- a/libavdevice/avfoundation.m +++ b/libavdevice/avfoundation.m @@ -31,13 +31,17 @@ #include "libavutil/pixdesc.h" #include "libavutil/opt.h" #include "libavutil/avstring.h" +#include "libavutil/avassert.h" #include "libavformat/internal.h" #include "libavutil/internal.h" #include "libavutil/parseutils.h" #include "libavutil/time.h" #include "libavutil/imgutils.h" +#include "libavutil/fifo.h" #include "avdevice.h" +#define FIFO_SIZE 4 + static const int avf_time_base = 1000000; static const AVRational avf_time_base_q = { @@ -128,8 +132,8 @@ typedef struct AVCaptureSession *capture_session; AVCaptureVideoDataOutput *video_output; AVCaptureAudioDataOutput *audio_output; - CMSampleBufferRef current_frame; - CMSampleBufferRef current_audio_frame; + AVFifoBuffer *video_fifo; + AVFifoBuffer *audio_fifo; AVCaptureDevice *observed_device; #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070 @@ -138,6 +142,11 @@ typedef struct int observed_quit; } AVFContext; +typedef struct { + int64_t ts; + CMSampleBufferRef frame; +} BufferRef; + static void lock_frames(AVFContext* ctx) { pthread_mutex_lock(&ctx->frame_lock); @@ -148,6 +157,48 @@ static void unlock_frames(AVFContext* ctx) pthread_mutex_unlock(&ctx->frame_lock); } +static inline void fifo_write(AVFifoBuffer* f, int64_t ts, CMSampleBufferRef frame) +{ + BufferRef buf = { + .ts = ts, + .frame = frame, + }; + + CFRetain(frame); + av_fifo_generic_write(f, &buf, sizeof(BufferRef), NULL); +} + +static inline void fifo_peek(AVFifoBuffer* f, BufferRef *buf) +{ + if (av_fifo_size(f)) { + av_fifo_generic_peek(f, buf, sizeof(BufferRef), NULL); + return; + } + buf->frame = nil; + return; +} + +static inline void fifo_drain(AVFifoBuffer* f, int release) +{ + av_assert2(av_fifo_size(f) >= sizeof(BufferRef)); + if (release) { + BufferRef buf; + fifo_peek(f, &buf); + CFRelease(buf.frame); + } + av_fifo_drain(f, sizeof(BufferRef)); +} + +static inline void fifo_freep(AVFifoBuffer **f) +{ + if (f) { + while (av_fifo_size(*f)) { + fifo_drain(*f, 1); + } + av_fifo_freep(f); + } +} + /** FrameReciever class - delegate for AVCaptureSession */ @interface AVFFrameReceiver : NSObject @@ -225,13 +276,16 @@ static void unlock_frames(AVFContext* ctx) didOutputSampleBuffer:(CMSampleBufferRef)videoFrame fromConnection:(AVCaptureConnection *)connection { + AVFifoBuffer *fifo = _context->video_fifo; + int64_t ts = av_gettime_relative(); lock_frames(_context); - if (_context->current_frame != nil) { - CFRelease(_context->current_frame); + if (av_fifo_space(fifo) == 0) { + av_log(_context, AV_LOG_DEBUG, "video fifo is full, the oldest frame has been dropped\n"); + fifo_drain(fifo, 1); } - _context->current_frame = (CMSampleBufferRef)CFRetain(videoFrame); + fifo_write(fifo, ts, videoFrame); unlock_frames(_context); @@ -269,13 +323,16 @@ static void unlock_frames(AVFContext* ctx) didOutputSampleBuffer:(CMSampleBufferRef)audioFrame fromConnection:(AVCaptureConnection *)connection { + AVFifoBuffer *fifo = _context->audio_fifo; + int64_t ts = av_gettime_relative(); lock_frames(_context); - if (_context->current_audio_frame != nil) { - CFRelease(_context->current_audio_frame); + if (!av_fifo_space(fifo)) { + av_log(_context, AV_LOG_DEBUG, "audio fifo is full, the oldest frame has been dropped\n"); + fifo_drain(fifo, 1); } - _context->current_audio_frame = (CMSampleBufferRef)CFRetain(audioFrame); + fifo_write(fifo, ts, audioFrame); unlock_frames(_context); @@ -301,12 +358,10 @@ static void destroy_context(AVFContext* ctx) ctx->avf_audio_delegate = NULL; av_freep(&ctx->audio_buffer); + fifo_freep(&ctx->video_fifo); + fifo_freep(&ctx->audio_fifo); pthread_mutex_destroy(&ctx->frame_lock); - - if (ctx->current_frame) { - CFRelease(ctx->current_frame); - } } static void parse_device_name(AVFormatContext *s) @@ -624,6 +679,7 @@ static int add_audio_device(AVFormatContext *s, AVCaptureDevice *audio_device) static int get_video_config(AVFormatContext *s) { AVFContext *ctx = (AVFContext*)s->priv_data; + BufferRef buf; CVImageBufferRef image_buffer; CMBlockBufferRef block_buffer; CGSize image_buffer_size; @@ -644,8 +700,13 @@ static int get_video_config(AVFormatContext *s) avpriv_set_pts_info(stream, 64, 1, avf_time_base); - image_buffer = CMSampleBufferGetImageBuffer(ctx->current_frame); - block_buffer = CMSampleBufferGetDataBuffer(ctx->current_frame); + fifo_peek(ctx->video_fifo, &buf); + if (buf.frame == nil) { + return 1; + } + + image_buffer = CMSampleBufferGetImageBuffer(buf.frame); + block_buffer = CMSampleBufferGetDataBuffer(buf.frame); if (image_buffer) { image_buffer_size = CVImageBufferGetEncodedSize(image_buffer); @@ -661,9 +722,6 @@ static int get_video_config(AVFormatContext *s) stream->codecpar->format = ctx->pixel_format; } - CFRelease(ctx->current_frame); - ctx->current_frame = nil; - unlock_frames(ctx); return 0; @@ -672,6 +730,7 @@ static int get_video_config(AVFormatContext *s) static int get_audio_config(AVFormatContext *s) { AVFContext *ctx = (AVFContext*)s->priv_data; + BufferRef buf; CMFormatDescriptionRef format_desc; AVStream* stream = avformat_new_stream(s, NULL); @@ -690,7 +749,12 @@ static int get_audio_config(AVFormatContext *s) avpriv_set_pts_info(stream, 64, 1, avf_time_base); - format_desc = CMSampleBufferGetFormatDescription(ctx->current_audio_frame); + fifo_peek(ctx->audio_fifo, &buf); + if (buf.frame == nil) { + return 1; + } + + format_desc = CMSampleBufferGetFormatDescription(buf.frame); const AudioStreamBasicDescription *basic_desc = CMAudioFormatDescriptionGetStreamBasicDescription(format_desc); if (!basic_desc) { @@ -737,7 +801,7 @@ static int get_audio_config(AVFormatContext *s) } if (ctx->audio_non_interleaved) { - CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(ctx->current_audio_frame); + CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(buf.frame); ctx->audio_buffer_size = CMBlockBufferGetDataLength(block_buffer); ctx->audio_buffer = av_malloc(ctx->audio_buffer_size); if (!ctx->audio_buffer) { @@ -746,9 +810,6 @@ static int get_audio_config(AVFormatContext *s) } } - CFRelease(ctx->current_audio_frame); - ctx->current_audio_frame = nil; - unlock_frames(ctx); return 0; @@ -771,6 +832,9 @@ static int avf_read_header(AVFormatContext *s) pthread_mutex_init(&ctx->frame_lock, NULL); + ctx->video_fifo = av_fifo_alloc_array(FIFO_SIZE, sizeof(BufferRef)); + ctx->audio_fifo = av_fifo_alloc_array(FIFO_SIZE, sizeof(BufferRef)); + #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070 CGGetActiveDisplayList(0, NULL, &num_screens); #endif @@ -1051,33 +1115,52 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt) AVFContext* ctx = (AVFContext*)s->priv_data; do { + BufferRef video; + BufferRef audio; CVImageBufferRef image_buffer; CMBlockBufferRef block_buffer; lock_frames(ctx); - if (ctx->current_frame != nil) { + fifo_peek(ctx->video_fifo, &video); + fifo_peek(ctx->audio_fifo, &audio); + + if (video.frame != nil && audio.frame != nil) { + // process oldest CMSampleBufferRef first + if (audio.ts <= video.ts) { + video.frame = nil; + } else { + audio.frame = nil; + } + } + + if (video.frame != nil) { int status; int length = 0; - image_buffer = CMSampleBufferGetImageBuffer(ctx->current_frame); - block_buffer = CMSampleBufferGetDataBuffer(ctx->current_frame); + fifo_drain(ctx->video_fifo, 0); + unlock_frames(ctx); + + image_buffer = CMSampleBufferGetImageBuffer(video.frame); + block_buffer = CMSampleBufferGetDataBuffer(video.frame); if (image_buffer != nil) { length = (int)CVPixelBufferGetDataSize(image_buffer); } else if (block_buffer != nil) { length = (int)CMBlockBufferGetDataLength(block_buffer); } else { + CFRelease(video.frame); return AVERROR(EINVAL); } if (av_new_packet(pkt, length) < 0) { + CFRelease(video.frame); return AVERROR(EIO); } CMItemCount count; CMSampleTimingInfo timing_info; - if (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_frame, 1, &timing_info, &count) == noErr) { + if (CMSampleBufferGetOutputSampleTimingInfoArray(video.frame, 1, &timing_info, &count) == noErr) { AVRational timebase_q = av_make_q(1, timing_info.presentationTimeStamp.timescale); pkt->pts = pkt->dts = av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q, avf_time_base_q); } @@ -1094,31 +1177,37 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt) status = AVERROR(EIO); } } - CFRelease(ctx->current_frame); - ctx->current_frame = nil; + CFRelease(video.frame); - if (status < 0) + if (status < 0) { return status; - } else if (ctx->current_audio_frame != nil) { - CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(ctx->current_audio_frame); + } + } else if (audio.frame != nil) { + CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(audio.frame); int block_buffer_size = CMBlockBufferGetDataLength(block_buffer); + fifo_drain(ctx->audio_fifo, 0); + unlock_frames(ctx); + if (!block_buffer || !block_buffer_size) { + CFRelease(audio.frame); return AVERROR(EIO); } if (ctx->audio_non_interleaved && block_buffer_size > ctx->audio_buffer_size) { + CFRelease(audio.frame); return AVERROR_BUFFER_TOO_SMALL; } if (av_new_packet(pkt, block_buffer_size) < 0) { + CFRelease(audio.frame); return AVERROR(EIO); } CMItemCount count; CMSampleTimingInfo timing_info; - if (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_audio_frame, 1, &timing_info, &count) == noErr) { + if (CMSampleBufferGetOutputSampleTimingInfoArray(audio.frame, 1, &timing_info, &count) == noErr) { AVRational timebase_q = av_make_q(1, timing_info.presentationTimeStamp.timescale); pkt->pts = pkt->dts = av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q, avf_time_base_q); } @@ -1131,6 +1220,7 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt) OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, ctx->audio_buffer); if (ret != kCMBlockBufferNoErr) { + CFRelease(audio.frame); return AVERROR(EIO); } @@ -1162,12 +1252,12 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt) } else { OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, pkt->data); if (ret != kCMBlockBufferNoErr) { + CFRelease(audio.frame); return AVERROR(EIO); } } - CFRelease(ctx->current_audio_frame); - ctx->current_audio_frame = nil; + CFRelease(audio.frame); } else { pkt->data = NULL; unlock_frames(ctx); @@ -1177,8 +1267,6 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt) return AVERROR(EAGAIN); } } - - unlock_frames(ctx); } while (!pkt->data); return 0;
From: Mark Reid <mindmark@gmail.com> Hi, This patch fixes audio issues I've had with some capture devices. The audio gets really choppy and stops working. This seems to be because avf_read_packet stops outputting the audio frames because a video frame happens to be available first. It base on the approach used in a patch from #4437 https://trac.ffmpeg.org/ticket/4437 My approach uses an AVFifoBuffer instead of NSMutableArray and also outputs the packets in the same order they arrive from AVFFoundation. should fix ticket #4437 and #4513 --- libavdevice/avfoundation.m | 160 ++++++++++++++++++++++++++++--------- 1 file changed, 124 insertions(+), 36 deletions(-)