@@ -31,13 +31,17 @@
#include "libavutil/pixdesc.h"
#include "libavutil/opt.h"
#include "libavutil/avstring.h"
+#include "libavutil/avassert.h"
#include "libavformat/internal.h"
#include "libavutil/internal.h"
#include "libavutil/parseutils.h"
#include "libavutil/time.h"
#include "libavutil/imgutils.h"
+#include "libavutil/fifo.h"
#include "avdevice.h"
+#define FIFO_SIZE 4
+
static const int avf_time_base = 1000000;
static const AVRational avf_time_base_q = {
@@ -128,8 +132,8 @@ typedef struct
AVCaptureSession *capture_session;
AVCaptureVideoDataOutput *video_output;
AVCaptureAudioDataOutput *audio_output;
- CMSampleBufferRef current_frame;
- CMSampleBufferRef current_audio_frame;
+ AVFifoBuffer *video_fifo;
+ AVFifoBuffer *audio_fifo;
AVCaptureDevice *observed_device;
#if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070
@@ -138,6 +142,11 @@ typedef struct
int observed_quit;
} AVFContext;
+typedef struct {
+ int64_t ts;
+ CMSampleBufferRef frame;
+} BufferRef;
+
static void lock_frames(AVFContext* ctx)
{
pthread_mutex_lock(&ctx->frame_lock);
@@ -148,6 +157,48 @@ static void unlock_frames(AVFContext* ctx)
pthread_mutex_unlock(&ctx->frame_lock);
}
+static inline void fifo_write(AVFifoBuffer* f, int64_t ts, CMSampleBufferRef frame)
+{
+ BufferRef buf = {
+ .ts = ts,
+ .frame = frame,
+ };
+
+ CFRetain(frame);
+ av_fifo_generic_write(f, &buf, sizeof(BufferRef), NULL);
+}
+
+static inline void fifo_peek(AVFifoBuffer* f, BufferRef *buf)
+{
+ if (av_fifo_size(f)) {
+ av_fifo_generic_peek(f, buf, sizeof(BufferRef), NULL);
+ return;
+ }
+ buf->frame = nil;
+ return;
+}
+
+static inline void fifo_drain(AVFifoBuffer* f, int release)
+{
+ av_assert2(av_fifo_size(f) >= sizeof(BufferRef));
+ if (release) {
+ BufferRef buf;
+ fifo_peek(f, &buf);
+ CFRelease(buf.frame);
+ }
+ av_fifo_drain(f, sizeof(BufferRef));
+}
+
+static inline void fifo_freep(AVFifoBuffer **f)
+{
+ if (f) {
+ while (av_fifo_size(*f)) {
+ fifo_drain(*f, 1);
+ }
+ av_fifo_freep(f);
+ }
+}
+
/** FrameReciever class - delegate for AVCaptureSession
*/
@interface AVFFrameReceiver : NSObject
@@ -225,13 +276,16 @@ static void unlock_frames(AVFContext* ctx)
didOutputSampleBuffer:(CMSampleBufferRef)videoFrame
fromConnection:(AVCaptureConnection *)connection
{
+ AVFifoBuffer *fifo = _context->video_fifo;
+ int64_t ts = av_gettime_relative();
lock_frames(_context);
- if (_context->current_frame != nil) {
- CFRelease(_context->current_frame);
+ if (av_fifo_space(fifo) == 0) {
+ av_log(_context, AV_LOG_DEBUG, "video fifo is full, the oldest frame has been dropped\n");
+ fifo_drain(fifo, 1);
}
- _context->current_frame = (CMSampleBufferRef)CFRetain(videoFrame);
+ fifo_write(fifo, ts, videoFrame);
unlock_frames(_context);
@@ -269,13 +323,16 @@ static void unlock_frames(AVFContext* ctx)
didOutputSampleBuffer:(CMSampleBufferRef)audioFrame
fromConnection:(AVCaptureConnection *)connection
{
+ AVFifoBuffer *fifo = _context->audio_fifo;
+ int64_t ts = av_gettime_relative();
lock_frames(_context);
- if (_context->current_audio_frame != nil) {
- CFRelease(_context->current_audio_frame);
+ if (!av_fifo_space(fifo)) {
+ av_log(_context, AV_LOG_DEBUG, "audio fifo is full, the oldest frame has been dropped\n");
+ fifo_drain(fifo, 1);
}
- _context->current_audio_frame = (CMSampleBufferRef)CFRetain(audioFrame);
+ fifo_write(fifo, ts, audioFrame);
unlock_frames(_context);
@@ -301,12 +358,10 @@ static void destroy_context(AVFContext* ctx)
ctx->avf_audio_delegate = NULL;
av_freep(&ctx->audio_buffer);
+ fifo_freep(&ctx->video_fifo);
+ fifo_freep(&ctx->audio_fifo);
pthread_mutex_destroy(&ctx->frame_lock);
-
- if (ctx->current_frame) {
- CFRelease(ctx->current_frame);
- }
}
static void parse_device_name(AVFormatContext *s)
@@ -624,6 +679,7 @@ static int add_audio_device(AVFormatContext *s, AVCaptureDevice *audio_device)
static int get_video_config(AVFormatContext *s)
{
AVFContext *ctx = (AVFContext*)s->priv_data;
+ BufferRef buf;
CVImageBufferRef image_buffer;
CMBlockBufferRef block_buffer;
CGSize image_buffer_size;
@@ -644,8 +700,13 @@ static int get_video_config(AVFormatContext *s)
avpriv_set_pts_info(stream, 64, 1, avf_time_base);
- image_buffer = CMSampleBufferGetImageBuffer(ctx->current_frame);
- block_buffer = CMSampleBufferGetDataBuffer(ctx->current_frame);
+ fifo_peek(ctx->video_fifo, &buf);
+ if (buf.frame == nil) {
+ return 1;
+ }
+
+ image_buffer = CMSampleBufferGetImageBuffer(buf.frame);
+ block_buffer = CMSampleBufferGetDataBuffer(buf.frame);
if (image_buffer) {
image_buffer_size = CVImageBufferGetEncodedSize(image_buffer);
@@ -661,9 +722,6 @@ static int get_video_config(AVFormatContext *s)
stream->codecpar->format = ctx->pixel_format;
}
- CFRelease(ctx->current_frame);
- ctx->current_frame = nil;
-
unlock_frames(ctx);
return 0;
@@ -672,6 +730,7 @@ static int get_video_config(AVFormatContext *s)
static int get_audio_config(AVFormatContext *s)
{
AVFContext *ctx = (AVFContext*)s->priv_data;
+ BufferRef buf;
CMFormatDescriptionRef format_desc;
AVStream* stream = avformat_new_stream(s, NULL);
@@ -690,7 +749,12 @@ static int get_audio_config(AVFormatContext *s)
avpriv_set_pts_info(stream, 64, 1, avf_time_base);
- format_desc = CMSampleBufferGetFormatDescription(ctx->current_audio_frame);
+ fifo_peek(ctx->audio_fifo, &buf);
+ if (buf.frame == nil) {
+ return 1;
+ }
+
+ format_desc = CMSampleBufferGetFormatDescription(buf.frame);
const AudioStreamBasicDescription *basic_desc = CMAudioFormatDescriptionGetStreamBasicDescription(format_desc);
if (!basic_desc) {
@@ -737,7 +801,7 @@ static int get_audio_config(AVFormatContext *s)
}
if (ctx->audio_non_interleaved) {
- CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(ctx->current_audio_frame);
+ CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(buf.frame);
ctx->audio_buffer_size = CMBlockBufferGetDataLength(block_buffer);
ctx->audio_buffer = av_malloc(ctx->audio_buffer_size);
if (!ctx->audio_buffer) {
@@ -746,9 +810,6 @@ static int get_audio_config(AVFormatContext *s)
}
}
- CFRelease(ctx->current_audio_frame);
- ctx->current_audio_frame = nil;
-
unlock_frames(ctx);
return 0;
@@ -771,6 +832,9 @@ static int avf_read_header(AVFormatContext *s)
pthread_mutex_init(&ctx->frame_lock, NULL);
+ ctx->video_fifo = av_fifo_alloc_array(FIFO_SIZE, sizeof(BufferRef));
+ ctx->audio_fifo = av_fifo_alloc_array(FIFO_SIZE, sizeof(BufferRef));
+
#if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070
CGGetActiveDisplayList(0, NULL, &num_screens);
#endif
@@ -1051,33 +1115,52 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt)
AVFContext* ctx = (AVFContext*)s->priv_data;
do {
+ BufferRef video;
+ BufferRef audio;
CVImageBufferRef image_buffer;
CMBlockBufferRef block_buffer;
lock_frames(ctx);
- if (ctx->current_frame != nil) {
+ fifo_peek(ctx->video_fifo, &video);
+ fifo_peek(ctx->audio_fifo, &audio);
+
+ if (video.frame != nil && audio.frame != nil) {
+ // process oldest CMSampleBufferRef first
+ if (audio.ts <= video.ts) {
+ video.frame = nil;
+ } else {
+ audio.frame = nil;
+ }
+ }
+
+ if (video.frame != nil) {
int status;
int length = 0;
- image_buffer = CMSampleBufferGetImageBuffer(ctx->current_frame);
- block_buffer = CMSampleBufferGetDataBuffer(ctx->current_frame);
+ fifo_drain(ctx->video_fifo, 0);
+ unlock_frames(ctx);
+
+ image_buffer = CMSampleBufferGetImageBuffer(video.frame);
+ block_buffer = CMSampleBufferGetDataBuffer(video.frame);
if (image_buffer != nil) {
length = (int)CVPixelBufferGetDataSize(image_buffer);
} else if (block_buffer != nil) {
length = (int)CMBlockBufferGetDataLength(block_buffer);
} else {
+ CFRelease(video.frame);
return AVERROR(EINVAL);
}
if (av_new_packet(pkt, length) < 0) {
+ CFRelease(video.frame);
return AVERROR(EIO);
}
CMItemCount count;
CMSampleTimingInfo timing_info;
- if (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_frame, 1, &timing_info, &count) == noErr) {
+ if (CMSampleBufferGetOutputSampleTimingInfoArray(video.frame, 1, &timing_info, &count) == noErr) {
AVRational timebase_q = av_make_q(1, timing_info.presentationTimeStamp.timescale);
pkt->pts = pkt->dts = av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q, avf_time_base_q);
}
@@ -1094,31 +1177,37 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt)
status = AVERROR(EIO);
}
}
- CFRelease(ctx->current_frame);
- ctx->current_frame = nil;
+ CFRelease(video.frame);
- if (status < 0)
+ if (status < 0) {
return status;
- } else if (ctx->current_audio_frame != nil) {
- CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(ctx->current_audio_frame);
+ }
+ } else if (audio.frame != nil) {
+ CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(audio.frame);
int block_buffer_size = CMBlockBufferGetDataLength(block_buffer);
+ fifo_drain(ctx->audio_fifo, 0);
+ unlock_frames(ctx);
+
if (!block_buffer || !block_buffer_size) {
+ CFRelease(audio.frame);
return AVERROR(EIO);
}
if (ctx->audio_non_interleaved && block_buffer_size > ctx->audio_buffer_size) {
+ CFRelease(audio.frame);
return AVERROR_BUFFER_TOO_SMALL;
}
if (av_new_packet(pkt, block_buffer_size) < 0) {
+ CFRelease(audio.frame);
return AVERROR(EIO);
}
CMItemCount count;
CMSampleTimingInfo timing_info;
- if (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_audio_frame, 1, &timing_info, &count) == noErr) {
+ if (CMSampleBufferGetOutputSampleTimingInfoArray(audio.frame, 1, &timing_info, &count) == noErr) {
AVRational timebase_q = av_make_q(1, timing_info.presentationTimeStamp.timescale);
pkt->pts = pkt->dts = av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q, avf_time_base_q);
}
@@ -1131,6 +1220,7 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt)
OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, ctx->audio_buffer);
if (ret != kCMBlockBufferNoErr) {
+ CFRelease(audio.frame);
return AVERROR(EIO);
}
@@ -1162,12 +1252,12 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt)
} else {
OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, pkt->data);
if (ret != kCMBlockBufferNoErr) {
+ CFRelease(audio.frame);
return AVERROR(EIO);
}
}
- CFRelease(ctx->current_audio_frame);
- ctx->current_audio_frame = nil;
+ CFRelease(audio.frame);
} else {
pkt->data = NULL;
unlock_frames(ctx);
@@ -1177,8 +1267,6 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt)
return AVERROR(EAGAIN);
}
}
-
- unlock_frames(ctx);
} while (!pkt->data);
return 0;
From: Mark Reid <mindmark@gmail.com> This fixes audio issues I've had with some capture devices. The audio gets really choppy and stops working. This seems to be because avf_read_packet stops outputting the audio frames because a video frame happens to be available first. It base on the approach used in a patch from #4437 https://trac.ffmpeg.org/ticket/4437 My approach uses an AVFifoBuffer instead of NSMutableArray and also outputs the packets in the same order they arrive from AVFFoundation. should fix ticket #4437 and #4513 --- libavdevice/avfoundation.m | 160 ++++++++++++++++++++++++++++--------- 1 file changed, 124 insertions(+), 36 deletions(-)