From patchwork Sun Feb 14 06:04:42 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Mark Reid X-Patchwork-Id: 25610 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id C221B449753 for ; Sun, 14 Feb 2021 08:34:49 +0200 (EET) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 9BA7E687F13; Sun, 14 Feb 2021 08:34:49 +0200 (EET) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from mail-oi1-f171.google.com (mail-oi1-f171.google.com [209.85.167.171]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id B27CB6806F5 for ; Sun, 14 Feb 2021 08:34:39 +0200 (EET) Received: by mail-oi1-f171.google.com with SMTP id i3so4523150oif.1 for ; Sat, 13 Feb 2021 22:34:39 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025; h=from:to:cc:subject:date:message-id:mime-version :content-transfer-encoding; bh=giicF8zKtBpffTFP2jn5mXEw2U3yGkM9vatylM3RDDc=; b=XK1z0RxxxGIWLJtIGyjBtqZDWFdGf1t5VMyrjdcW47YdpVHSsxe9T20CCVo+7p8YLr JzrKV+laiBZ8xNku0rBBVdyv6hrtSRaQx2tOo3fM53I8yHkeGIy9nqRZY9+oReYsBsyc FKHTjD4yXrLVzoMBu9XI/kv7T5q8mfcKb+OfNvbXI1zwnjYtQ65rB1BdVa/dVoR1RP7t JQV0fW3TnU+wrLgXDZS1ahOWV8EWVbyGNzlDEvXTJm1cL1OojyhvbMaXk/KBL527vqau GMP+hnWMu6mp1Ax6AN4Ij0mLJbKf0FHNALnjOiRwAbOwC1+lkPvbVvTgB/FOxGm9Yo5u lfjw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id:mime-version :content-transfer-encoding; bh=giicF8zKtBpffTFP2jn5mXEw2U3yGkM9vatylM3RDDc=; b=pKt4BtoRUcJCdONY/tgXqX5EKbHoPnZVmIPUcZSHzmt/042Yz0BL4HXX6qA4XpgOJ+ Bb2F3GWNGutT76ipdqD39cEeRFSvFNHfMF3PVCtOCgz4UgzDUAUDhw/oK1ThiZyyCkVI /53Kiv/xNVmh0yd++qJ/u23cbUmiqHmH7PxHpDtaPiS2CrwlB8RZ6SLRk4wm/r6mux7C 6VxkL39tBYwc7X3HzaXVfTFX8GsCCkMz1moedJlXlF9ueqm2q90U+IfBKybQZ/a4DlIJ wnSZ5s6cB2DWHAjDBgBogoOadW42g5wg/1LFCgzQsIrPLEliHhVwxbiegXXiXsZi6Ekm vbIw== X-Gm-Message-State: AOAM531XKAUgTrl72d0VUD3BFCbXqndGc4L/kxVtWPhMpmIrx/GUHEfz CK9PbRGBRiz56Z4YCSYiehtQkrKf5++Dkg== X-Google-Smtp-Source: ABdhPJy5TmxNWRSiFs9C+1q2kbOaM+rNybrkx544UuWZ+rIvQB34U7dAxnfzvsSMU6NUITzSrI54OA== X-Received: by 2002:a17:90a:1082:: with SMTP id c2mr10091884pja.183.1613282685721; Sat, 13 Feb 2021 22:04:45 -0800 (PST) Received: from localhost.localdomain (S0106bc4dfba470f3.vc.shawcable.net. [174.7.244.175]) by smtp.gmail.com with ESMTPSA id x11sm13703295pfr.24.2021.02.13.22.04.44 (version=TLS1_2 cipher=ECDHE-ECDSA-AES128-GCM-SHA256 bits=128/128); Sat, 13 Feb 2021 22:04:45 -0800 (PST) From: mindmark@gmail.com To: ffmpeg-devel@ffmpeg.org Date: Sat, 13 Feb 2021 22:04:42 -0800 Message-Id: <20210214060442.20157-1-mindmark@gmail.com> X-Mailer: git-send-email 2.29.2 MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH] libavdevice/avfoundation: add buffer fifo and output packets in order they arrive X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: Mark Reid Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" From: Mark Reid Hi, This patch fixes audio issues I've had with some capture devices. The audio gets really choppy and stops working. This seems to be because avf_read_packet stops outputting the audio frames because a video frame happens to be available first. It base on the approach used in a patch from #4437 https://trac.ffmpeg.org/ticket/4437 My approach uses an AVFifoBuffer instead of NSMutableArray and also outputs the packets in the same order they arrive from AVFFoundation. should fix ticket #4437 and #4513 --- libavdevice/avfoundation.m | 160 ++++++++++++++++++++++++++++--------- 1 file changed, 124 insertions(+), 36 deletions(-) diff --git a/libavdevice/avfoundation.m b/libavdevice/avfoundation.m index 59d5b0af4f..5ac6ec4183 100644 --- a/libavdevice/avfoundation.m +++ b/libavdevice/avfoundation.m @@ -31,13 +31,17 @@ #include "libavutil/pixdesc.h" #include "libavutil/opt.h" #include "libavutil/avstring.h" +#include "libavutil/avassert.h" #include "libavformat/internal.h" #include "libavutil/internal.h" #include "libavutil/parseutils.h" #include "libavutil/time.h" #include "libavutil/imgutils.h" +#include "libavutil/fifo.h" #include "avdevice.h" +#define FIFO_SIZE 4 + static const int avf_time_base = 1000000; static const AVRational avf_time_base_q = { @@ -128,8 +132,8 @@ typedef struct AVCaptureSession *capture_session; AVCaptureVideoDataOutput *video_output; AVCaptureAudioDataOutput *audio_output; - CMSampleBufferRef current_frame; - CMSampleBufferRef current_audio_frame; + AVFifoBuffer *video_fifo; + AVFifoBuffer *audio_fifo; AVCaptureDevice *observed_device; #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070 @@ -138,6 +142,11 @@ typedef struct int observed_quit; } AVFContext; +typedef struct { + int64_t ts; + CMSampleBufferRef frame; +} BufferRef; + static void lock_frames(AVFContext* ctx) { pthread_mutex_lock(&ctx->frame_lock); @@ -148,6 +157,48 @@ static void unlock_frames(AVFContext* ctx) pthread_mutex_unlock(&ctx->frame_lock); } +static inline void fifo_write(AVFifoBuffer* f, int64_t ts, CMSampleBufferRef frame) +{ + BufferRef buf = { + .ts = ts, + .frame = frame, + }; + + CFRetain(frame); + av_fifo_generic_write(f, &buf, sizeof(BufferRef), NULL); +} + +static inline void fifo_peek(AVFifoBuffer* f, BufferRef *buf) +{ + if (av_fifo_size(f)) { + av_fifo_generic_peek(f, buf, sizeof(BufferRef), NULL); + return; + } + buf->frame = nil; + return; +} + +static inline void fifo_drain(AVFifoBuffer* f, int release) +{ + av_assert2(av_fifo_size(f) >= sizeof(BufferRef)); + if (release) { + BufferRef buf; + fifo_peek(f, &buf); + CFRelease(buf.frame); + } + av_fifo_drain(f, sizeof(BufferRef)); +} + +static inline void fifo_freep(AVFifoBuffer **f) +{ + if (f) { + while (av_fifo_size(*f)) { + fifo_drain(*f, 1); + } + av_fifo_freep(f); + } +} + /** FrameReciever class - delegate for AVCaptureSession */ @interface AVFFrameReceiver : NSObject @@ -225,13 +276,16 @@ static void unlock_frames(AVFContext* ctx) didOutputSampleBuffer:(CMSampleBufferRef)videoFrame fromConnection:(AVCaptureConnection *)connection { + AVFifoBuffer *fifo = _context->video_fifo; + int64_t ts = av_gettime_relative(); lock_frames(_context); - if (_context->current_frame != nil) { - CFRelease(_context->current_frame); + if (av_fifo_space(fifo) == 0) { + av_log(_context, AV_LOG_DEBUG, "video fifo is full, the oldest frame has been dropped\n"); + fifo_drain(fifo, 1); } - _context->current_frame = (CMSampleBufferRef)CFRetain(videoFrame); + fifo_write(fifo, ts, videoFrame); unlock_frames(_context); @@ -269,13 +323,16 @@ static void unlock_frames(AVFContext* ctx) didOutputSampleBuffer:(CMSampleBufferRef)audioFrame fromConnection:(AVCaptureConnection *)connection { + AVFifoBuffer *fifo = _context->audio_fifo; + int64_t ts = av_gettime_relative(); lock_frames(_context); - if (_context->current_audio_frame != nil) { - CFRelease(_context->current_audio_frame); + if (!av_fifo_space(fifo)) { + av_log(_context, AV_LOG_DEBUG, "audio fifo is full, the oldest frame has been dropped\n"); + fifo_drain(fifo, 1); } - _context->current_audio_frame = (CMSampleBufferRef)CFRetain(audioFrame); + fifo_write(fifo, ts, audioFrame); unlock_frames(_context); @@ -301,12 +358,10 @@ static void destroy_context(AVFContext* ctx) ctx->avf_audio_delegate = NULL; av_freep(&ctx->audio_buffer); + fifo_freep(&ctx->video_fifo); + fifo_freep(&ctx->audio_fifo); pthread_mutex_destroy(&ctx->frame_lock); - - if (ctx->current_frame) { - CFRelease(ctx->current_frame); - } } static void parse_device_name(AVFormatContext *s) @@ -624,6 +679,7 @@ static int add_audio_device(AVFormatContext *s, AVCaptureDevice *audio_device) static int get_video_config(AVFormatContext *s) { AVFContext *ctx = (AVFContext*)s->priv_data; + BufferRef buf; CVImageBufferRef image_buffer; CMBlockBufferRef block_buffer; CGSize image_buffer_size; @@ -644,8 +700,13 @@ static int get_video_config(AVFormatContext *s) avpriv_set_pts_info(stream, 64, 1, avf_time_base); - image_buffer = CMSampleBufferGetImageBuffer(ctx->current_frame); - block_buffer = CMSampleBufferGetDataBuffer(ctx->current_frame); + fifo_peek(ctx->video_fifo, &buf); + if (buf.frame == nil) { + return 1; + } + + image_buffer = CMSampleBufferGetImageBuffer(buf.frame); + block_buffer = CMSampleBufferGetDataBuffer(buf.frame); if (image_buffer) { image_buffer_size = CVImageBufferGetEncodedSize(image_buffer); @@ -661,9 +722,6 @@ static int get_video_config(AVFormatContext *s) stream->codecpar->format = ctx->pixel_format; } - CFRelease(ctx->current_frame); - ctx->current_frame = nil; - unlock_frames(ctx); return 0; @@ -672,6 +730,7 @@ static int get_video_config(AVFormatContext *s) static int get_audio_config(AVFormatContext *s) { AVFContext *ctx = (AVFContext*)s->priv_data; + BufferRef buf; CMFormatDescriptionRef format_desc; AVStream* stream = avformat_new_stream(s, NULL); @@ -690,7 +749,12 @@ static int get_audio_config(AVFormatContext *s) avpriv_set_pts_info(stream, 64, 1, avf_time_base); - format_desc = CMSampleBufferGetFormatDescription(ctx->current_audio_frame); + fifo_peek(ctx->audio_fifo, &buf); + if (buf.frame == nil) { + return 1; + } + + format_desc = CMSampleBufferGetFormatDescription(buf.frame); const AudioStreamBasicDescription *basic_desc = CMAudioFormatDescriptionGetStreamBasicDescription(format_desc); if (!basic_desc) { @@ -737,7 +801,7 @@ static int get_audio_config(AVFormatContext *s) } if (ctx->audio_non_interleaved) { - CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(ctx->current_audio_frame); + CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(buf.frame); ctx->audio_buffer_size = CMBlockBufferGetDataLength(block_buffer); ctx->audio_buffer = av_malloc(ctx->audio_buffer_size); if (!ctx->audio_buffer) { @@ -746,9 +810,6 @@ static int get_audio_config(AVFormatContext *s) } } - CFRelease(ctx->current_audio_frame); - ctx->current_audio_frame = nil; - unlock_frames(ctx); return 0; @@ -771,6 +832,9 @@ static int avf_read_header(AVFormatContext *s) pthread_mutex_init(&ctx->frame_lock, NULL); + ctx->video_fifo = av_fifo_alloc_array(FIFO_SIZE, sizeof(BufferRef)); + ctx->audio_fifo = av_fifo_alloc_array(FIFO_SIZE, sizeof(BufferRef)); + #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070 CGGetActiveDisplayList(0, NULL, &num_screens); #endif @@ -1051,33 +1115,52 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt) AVFContext* ctx = (AVFContext*)s->priv_data; do { + BufferRef video; + BufferRef audio; CVImageBufferRef image_buffer; CMBlockBufferRef block_buffer; lock_frames(ctx); - if (ctx->current_frame != nil) { + fifo_peek(ctx->video_fifo, &video); + fifo_peek(ctx->audio_fifo, &audio); + + if (video.frame != nil && audio.frame != nil) { + // process oldest CMSampleBufferRef first + if (audio.ts <= video.ts) { + video.frame = nil; + } else { + audio.frame = nil; + } + } + + if (video.frame != nil) { int status; int length = 0; - image_buffer = CMSampleBufferGetImageBuffer(ctx->current_frame); - block_buffer = CMSampleBufferGetDataBuffer(ctx->current_frame); + fifo_drain(ctx->video_fifo, 0); + unlock_frames(ctx); + + image_buffer = CMSampleBufferGetImageBuffer(video.frame); + block_buffer = CMSampleBufferGetDataBuffer(video.frame); if (image_buffer != nil) { length = (int)CVPixelBufferGetDataSize(image_buffer); } else if (block_buffer != nil) { length = (int)CMBlockBufferGetDataLength(block_buffer); } else { + CFRelease(video.frame); return AVERROR(EINVAL); } if (av_new_packet(pkt, length) < 0) { + CFRelease(video.frame); return AVERROR(EIO); } CMItemCount count; CMSampleTimingInfo timing_info; - if (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_frame, 1, &timing_info, &count) == noErr) { + if (CMSampleBufferGetOutputSampleTimingInfoArray(video.frame, 1, &timing_info, &count) == noErr) { AVRational timebase_q = av_make_q(1, timing_info.presentationTimeStamp.timescale); pkt->pts = pkt->dts = av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q, avf_time_base_q); } @@ -1094,31 +1177,37 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt) status = AVERROR(EIO); } } - CFRelease(ctx->current_frame); - ctx->current_frame = nil; + CFRelease(video.frame); - if (status < 0) + if (status < 0) { return status; - } else if (ctx->current_audio_frame != nil) { - CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(ctx->current_audio_frame); + } + } else if (audio.frame != nil) { + CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(audio.frame); int block_buffer_size = CMBlockBufferGetDataLength(block_buffer); + fifo_drain(ctx->audio_fifo, 0); + unlock_frames(ctx); + if (!block_buffer || !block_buffer_size) { + CFRelease(audio.frame); return AVERROR(EIO); } if (ctx->audio_non_interleaved && block_buffer_size > ctx->audio_buffer_size) { + CFRelease(audio.frame); return AVERROR_BUFFER_TOO_SMALL; } if (av_new_packet(pkt, block_buffer_size) < 0) { + CFRelease(audio.frame); return AVERROR(EIO); } CMItemCount count; CMSampleTimingInfo timing_info; - if (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_audio_frame, 1, &timing_info, &count) == noErr) { + if (CMSampleBufferGetOutputSampleTimingInfoArray(audio.frame, 1, &timing_info, &count) == noErr) { AVRational timebase_q = av_make_q(1, timing_info.presentationTimeStamp.timescale); pkt->pts = pkt->dts = av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q, avf_time_base_q); } @@ -1131,6 +1220,7 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt) OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, ctx->audio_buffer); if (ret != kCMBlockBufferNoErr) { + CFRelease(audio.frame); return AVERROR(EIO); } @@ -1162,12 +1252,12 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt) } else { OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, pkt->data); if (ret != kCMBlockBufferNoErr) { + CFRelease(audio.frame); return AVERROR(EIO); } } - CFRelease(ctx->current_audio_frame); - ctx->current_audio_frame = nil; + CFRelease(audio.frame); } else { pkt->data = NULL; unlock_frames(ctx); @@ -1177,8 +1267,6 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt) return AVERROR(EAGAIN); } } - - unlock_frames(ctx); } while (!pkt->data); return 0;