From patchwork Mon Apr 12 02:31:41 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Mark Reid X-Patchwork-Id: 26872 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id 4192A449793 for ; Mon, 12 Apr 2021 05:31:54 +0300 (EEST) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 11200687F1A; Mon, 12 Apr 2021 05:31:54 +0300 (EEST) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from mail-pj1-f46.google.com (mail-pj1-f46.google.com [209.85.216.46]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id B73D9680869 for ; Mon, 12 Apr 2021 05:31:47 +0300 (EEST) Received: by mail-pj1-f46.google.com with SMTP id b8-20020a17090a5508b029014d0fbe9b64so7961813pji.5 for ; Sun, 11 Apr 2021 19:31:47 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025; h=from:to:cc:subject:date:message-id:mime-version :content-transfer-encoding; bh=8J0LiGrkKohFYEyPir47PF4pBJKe4z0i8N45EygAdSQ=; b=CamO5cueV+9Om5PMxTZHKWiDGJthrl5GZKKgTo9cnOPeCInfbCDvJL/TPRIKfDrkEV FgKfusF+mqeWmB+KWlk1SCmTPwvYXNiHas0qtO+uLmZ58xajHHm/p6OIpLvslON/tqRA i3IyJ7bjhsNfCSZn3aKETxYP3ryZJ/WZWo3SUIAmGJxXr3FoIE+rJCPO4dbrQob3gz+U fFRQfr70IbA0kLsxe69I171axaN8ZFhNVQGuJd/X5rYrhmOSnvjEplsUWO3Rz4op3ncy IcPSTWNPyg2xlhdfqF7iIExXXNx7ZExqde92+PxPYiEslCyt4xpDkMq0NhXCajlA6iTV SXcw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id:mime-version :content-transfer-encoding; bh=8J0LiGrkKohFYEyPir47PF4pBJKe4z0i8N45EygAdSQ=; b=fShfcZPUDcDpiJQkrG+mlPBAdX8Hnfbo219y598vCtIZYxua2w24yDSo3AGWI8jvsr oVLPQffO5Itt/8LwRCEk00+CImqk1WvAXe58nxp2wP0iWnZodnkoGW5fqQr/6TMHbnv5 lOP4lz9Z0IeNm+d3hz0KkYhnSnPx68xAu97YHgBptB6lgU1UkGanMoAeyROk2qRZgIt7 o9WZDODXtEfaJTg3bHWjk2TjpC3c0JLuHPObMIvICntre62FL0TKACisdkEGLh7M5DQe o2mTY5NSzEIZgCqwiq6zTWG8fr/J+w4rTJnFdQBLxvwMVzb6xPR3YDbOpIKpEbQH1gBK qUfA== X-Gm-Message-State: AOAM532BWcnZDJBLqqBDjOXP/p5I2c4PGloO3QuFh9De+Sv/CWcnEZXh mZjrZpunhhe8xPqkUpuvWDiY9PxizuQ= X-Google-Smtp-Source: ABdhPJwcpUEvj7/V4VduDseWrGLgzd3R3o7Fp8P4PptUkxwQEHxUjEu3wSrJGf3mjlxvidG0+CF6RQ== X-Received: by 2002:a17:902:24:b029:e9:3f8f:9af9 with SMTP id 33-20020a1709020024b02900e93f8f9af9mr24696982pla.34.1618194705829; Sun, 11 Apr 2021 19:31:45 -0700 (PDT) Received: from localhost.localdomain (S0106bc4dfba470f3.vc.shawcable.net. [174.7.244.175]) by smtp.gmail.com with ESMTPSA id q95sm9258811pjq.20.2021.04.11.19.31.45 (version=TLS1_2 cipher=ECDHE-ECDSA-AES128-GCM-SHA256 bits=128/128); Sun, 11 Apr 2021 19:31:45 -0700 (PDT) From: mindmark@gmail.com To: ffmpeg-devel@ffmpeg.org Date: Sun, 11 Apr 2021 19:31:41 -0700 Message-Id: <20210412023142.8470-1-mindmark@gmail.com> X-Mailer: git-send-email 2.29.2 MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH v2 1/2] libavdevice/avfoundation: add buffer fifo and output packets in order they arrive X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: Mark Reid Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" From: Mark Reid This fixes audio issues I've had with some capture devices. The audio gets really choppy and stops working. This seems to be because avf_read_packet stops outputting the audio frames because a video frame happens to be available first. It base on the approach used in a patch from #4437 https://trac.ffmpeg.org/ticket/4437 My approach uses an AVFifoBuffer instead of NSMutableArray and also outputs the packets in the same order they arrive from AVFFoundation. should fix ticket #4437 and #4513 --- libavdevice/avfoundation.m | 160 ++++++++++++++++++++++++++++--------- 1 file changed, 124 insertions(+), 36 deletions(-) diff --git a/libavdevice/avfoundation.m b/libavdevice/avfoundation.m index 59d5b0af4f..5ac6ec4183 100644 --- a/libavdevice/avfoundation.m +++ b/libavdevice/avfoundation.m @@ -31,13 +31,17 @@ #include "libavutil/pixdesc.h" #include "libavutil/opt.h" #include "libavutil/avstring.h" +#include "libavutil/avassert.h" #include "libavformat/internal.h" #include "libavutil/internal.h" #include "libavutil/parseutils.h" #include "libavutil/time.h" #include "libavutil/imgutils.h" +#include "libavutil/fifo.h" #include "avdevice.h" +#define FIFO_SIZE 4 + static const int avf_time_base = 1000000; static const AVRational avf_time_base_q = { @@ -128,8 +132,8 @@ typedef struct AVCaptureSession *capture_session; AVCaptureVideoDataOutput *video_output; AVCaptureAudioDataOutput *audio_output; - CMSampleBufferRef current_frame; - CMSampleBufferRef current_audio_frame; + AVFifoBuffer *video_fifo; + AVFifoBuffer *audio_fifo; AVCaptureDevice *observed_device; #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070 @@ -138,6 +142,11 @@ typedef struct int observed_quit; } AVFContext; +typedef struct { + int64_t ts; + CMSampleBufferRef frame; +} BufferRef; + static void lock_frames(AVFContext* ctx) { pthread_mutex_lock(&ctx->frame_lock); @@ -148,6 +157,48 @@ static void unlock_frames(AVFContext* ctx) pthread_mutex_unlock(&ctx->frame_lock); } +static inline void fifo_write(AVFifoBuffer* f, int64_t ts, CMSampleBufferRef frame) +{ + BufferRef buf = { + .ts = ts, + .frame = frame, + }; + + CFRetain(frame); + av_fifo_generic_write(f, &buf, sizeof(BufferRef), NULL); +} + +static inline void fifo_peek(AVFifoBuffer* f, BufferRef *buf) +{ + if (av_fifo_size(f)) { + av_fifo_generic_peek(f, buf, sizeof(BufferRef), NULL); + return; + } + buf->frame = nil; + return; +} + +static inline void fifo_drain(AVFifoBuffer* f, int release) +{ + av_assert2(av_fifo_size(f) >= sizeof(BufferRef)); + if (release) { + BufferRef buf; + fifo_peek(f, &buf); + CFRelease(buf.frame); + } + av_fifo_drain(f, sizeof(BufferRef)); +} + +static inline void fifo_freep(AVFifoBuffer **f) +{ + if (f) { + while (av_fifo_size(*f)) { + fifo_drain(*f, 1); + } + av_fifo_freep(f); + } +} + /** FrameReciever class - delegate for AVCaptureSession */ @interface AVFFrameReceiver : NSObject @@ -225,13 +276,16 @@ static void unlock_frames(AVFContext* ctx) didOutputSampleBuffer:(CMSampleBufferRef)videoFrame fromConnection:(AVCaptureConnection *)connection { + AVFifoBuffer *fifo = _context->video_fifo; + int64_t ts = av_gettime_relative(); lock_frames(_context); - if (_context->current_frame != nil) { - CFRelease(_context->current_frame); + if (av_fifo_space(fifo) == 0) { + av_log(_context, AV_LOG_DEBUG, "video fifo is full, the oldest frame has been dropped\n"); + fifo_drain(fifo, 1); } - _context->current_frame = (CMSampleBufferRef)CFRetain(videoFrame); + fifo_write(fifo, ts, videoFrame); unlock_frames(_context); @@ -269,13 +323,16 @@ static void unlock_frames(AVFContext* ctx) didOutputSampleBuffer:(CMSampleBufferRef)audioFrame fromConnection:(AVCaptureConnection *)connection { + AVFifoBuffer *fifo = _context->audio_fifo; + int64_t ts = av_gettime_relative(); lock_frames(_context); - if (_context->current_audio_frame != nil) { - CFRelease(_context->current_audio_frame); + if (!av_fifo_space(fifo)) { + av_log(_context, AV_LOG_DEBUG, "audio fifo is full, the oldest frame has been dropped\n"); + fifo_drain(fifo, 1); } - _context->current_audio_frame = (CMSampleBufferRef)CFRetain(audioFrame); + fifo_write(fifo, ts, audioFrame); unlock_frames(_context); @@ -301,12 +358,10 @@ static void destroy_context(AVFContext* ctx) ctx->avf_audio_delegate = NULL; av_freep(&ctx->audio_buffer); + fifo_freep(&ctx->video_fifo); + fifo_freep(&ctx->audio_fifo); pthread_mutex_destroy(&ctx->frame_lock); - - if (ctx->current_frame) { - CFRelease(ctx->current_frame); - } } static void parse_device_name(AVFormatContext *s) @@ -624,6 +679,7 @@ static int add_audio_device(AVFormatContext *s, AVCaptureDevice *audio_device) static int get_video_config(AVFormatContext *s) { AVFContext *ctx = (AVFContext*)s->priv_data; + BufferRef buf; CVImageBufferRef image_buffer; CMBlockBufferRef block_buffer; CGSize image_buffer_size; @@ -644,8 +700,13 @@ static int get_video_config(AVFormatContext *s) avpriv_set_pts_info(stream, 64, 1, avf_time_base); - image_buffer = CMSampleBufferGetImageBuffer(ctx->current_frame); - block_buffer = CMSampleBufferGetDataBuffer(ctx->current_frame); + fifo_peek(ctx->video_fifo, &buf); + if (buf.frame == nil) { + return 1; + } + + image_buffer = CMSampleBufferGetImageBuffer(buf.frame); + block_buffer = CMSampleBufferGetDataBuffer(buf.frame); if (image_buffer) { image_buffer_size = CVImageBufferGetEncodedSize(image_buffer); @@ -661,9 +722,6 @@ static int get_video_config(AVFormatContext *s) stream->codecpar->format = ctx->pixel_format; } - CFRelease(ctx->current_frame); - ctx->current_frame = nil; - unlock_frames(ctx); return 0; @@ -672,6 +730,7 @@ static int get_video_config(AVFormatContext *s) static int get_audio_config(AVFormatContext *s) { AVFContext *ctx = (AVFContext*)s->priv_data; + BufferRef buf; CMFormatDescriptionRef format_desc; AVStream* stream = avformat_new_stream(s, NULL); @@ -690,7 +749,12 @@ static int get_audio_config(AVFormatContext *s) avpriv_set_pts_info(stream, 64, 1, avf_time_base); - format_desc = CMSampleBufferGetFormatDescription(ctx->current_audio_frame); + fifo_peek(ctx->audio_fifo, &buf); + if (buf.frame == nil) { + return 1; + } + + format_desc = CMSampleBufferGetFormatDescription(buf.frame); const AudioStreamBasicDescription *basic_desc = CMAudioFormatDescriptionGetStreamBasicDescription(format_desc); if (!basic_desc) { @@ -737,7 +801,7 @@ static int get_audio_config(AVFormatContext *s) } if (ctx->audio_non_interleaved) { - CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(ctx->current_audio_frame); + CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(buf.frame); ctx->audio_buffer_size = CMBlockBufferGetDataLength(block_buffer); ctx->audio_buffer = av_malloc(ctx->audio_buffer_size); if (!ctx->audio_buffer) { @@ -746,9 +810,6 @@ static int get_audio_config(AVFormatContext *s) } } - CFRelease(ctx->current_audio_frame); - ctx->current_audio_frame = nil; - unlock_frames(ctx); return 0; @@ -771,6 +832,9 @@ static int avf_read_header(AVFormatContext *s) pthread_mutex_init(&ctx->frame_lock, NULL); + ctx->video_fifo = av_fifo_alloc_array(FIFO_SIZE, sizeof(BufferRef)); + ctx->audio_fifo = av_fifo_alloc_array(FIFO_SIZE, sizeof(BufferRef)); + #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070 CGGetActiveDisplayList(0, NULL, &num_screens); #endif @@ -1051,33 +1115,52 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt) AVFContext* ctx = (AVFContext*)s->priv_data; do { + BufferRef video; + BufferRef audio; CVImageBufferRef image_buffer; CMBlockBufferRef block_buffer; lock_frames(ctx); - if (ctx->current_frame != nil) { + fifo_peek(ctx->video_fifo, &video); + fifo_peek(ctx->audio_fifo, &audio); + + if (video.frame != nil && audio.frame != nil) { + // process oldest CMSampleBufferRef first + if (audio.ts <= video.ts) { + video.frame = nil; + } else { + audio.frame = nil; + } + } + + if (video.frame != nil) { int status; int length = 0; - image_buffer = CMSampleBufferGetImageBuffer(ctx->current_frame); - block_buffer = CMSampleBufferGetDataBuffer(ctx->current_frame); + fifo_drain(ctx->video_fifo, 0); + unlock_frames(ctx); + + image_buffer = CMSampleBufferGetImageBuffer(video.frame); + block_buffer = CMSampleBufferGetDataBuffer(video.frame); if (image_buffer != nil) { length = (int)CVPixelBufferGetDataSize(image_buffer); } else if (block_buffer != nil) { length = (int)CMBlockBufferGetDataLength(block_buffer); } else { + CFRelease(video.frame); return AVERROR(EINVAL); } if (av_new_packet(pkt, length) < 0) { + CFRelease(video.frame); return AVERROR(EIO); } CMItemCount count; CMSampleTimingInfo timing_info; - if (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_frame, 1, &timing_info, &count) == noErr) { + if (CMSampleBufferGetOutputSampleTimingInfoArray(video.frame, 1, &timing_info, &count) == noErr) { AVRational timebase_q = av_make_q(1, timing_info.presentationTimeStamp.timescale); pkt->pts = pkt->dts = av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q, avf_time_base_q); } @@ -1094,31 +1177,37 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt) status = AVERROR(EIO); } } - CFRelease(ctx->current_frame); - ctx->current_frame = nil; + CFRelease(video.frame); - if (status < 0) + if (status < 0) { return status; - } else if (ctx->current_audio_frame != nil) { - CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(ctx->current_audio_frame); + } + } else if (audio.frame != nil) { + CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(audio.frame); int block_buffer_size = CMBlockBufferGetDataLength(block_buffer); + fifo_drain(ctx->audio_fifo, 0); + unlock_frames(ctx); + if (!block_buffer || !block_buffer_size) { + CFRelease(audio.frame); return AVERROR(EIO); } if (ctx->audio_non_interleaved && block_buffer_size > ctx->audio_buffer_size) { + CFRelease(audio.frame); return AVERROR_BUFFER_TOO_SMALL; } if (av_new_packet(pkt, block_buffer_size) < 0) { + CFRelease(audio.frame); return AVERROR(EIO); } CMItemCount count; CMSampleTimingInfo timing_info; - if (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_audio_frame, 1, &timing_info, &count) == noErr) { + if (CMSampleBufferGetOutputSampleTimingInfoArray(audio.frame, 1, &timing_info, &count) == noErr) { AVRational timebase_q = av_make_q(1, timing_info.presentationTimeStamp.timescale); pkt->pts = pkt->dts = av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q, avf_time_base_q); } @@ -1131,6 +1220,7 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt) OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, ctx->audio_buffer); if (ret != kCMBlockBufferNoErr) { + CFRelease(audio.frame); return AVERROR(EIO); } @@ -1162,12 +1252,12 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt) } else { OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, pkt->data); if (ret != kCMBlockBufferNoErr) { + CFRelease(audio.frame); return AVERROR(EIO); } } - CFRelease(ctx->current_audio_frame); - ctx->current_audio_frame = nil; + CFRelease(audio.frame); } else { pkt->data = NULL; unlock_frames(ctx); @@ -1177,8 +1267,6 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt) return AVERROR(EAGAIN); } } - - unlock_frames(ctx); } while (!pkt->data); return 0;