[FFmpeg-devel,v9,1/3] libavdevice/avfoundation.m: use AudioConvert, extend supported formats

Message ID	03970EF9-6434-417E-B32D-70E37743B16E@rastageeks.org
State	New
Headers	show Delivered-To: ffmpegpatchwork2@gmail.com Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; From: Romain Beauxis <toots@rastageeks.org> Mime-Version: 1.0 (Mac OS X Mail 15.0 \(3693.40.0.1.81\)) Message-Id: <03970EF9-6434-417E-B32D-70E37743B16E@rastageeks.org> Date: Thu, 6 Jan 2022 08:24:05 -0600 To: ffmpeg-devel@ffmpeg.org Subject: [FFmpeg-devel] [PATCH v9 1/3] libavdevice/avfoundation.m: use AudioConvert, extend supported formats Precedence: list Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Cc: thilo.borgmann@mail.de, Aman Karmani <ffmpeg@tmm1.net>, epirat07@gmail.com Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>
Series	[FFmpeg-devel,v9,1/3] libavdevice/avfoundation.m: use AudioConvert, extend supported formats \| expand [FFmpeg-devel,v9,1/3] libavdevice/avfoundation.m: use AudioConvert, extend supported formats [FFmpeg-devel,v9,2/3] libavdevice/avfoundation.m: Replace mutex-based concurrency handling in avfou… [FFmpeg-devel,v9,3/3] libavdevice/avfoundation.m: Allow to select devices by unique ID

Context	Check	Description
andriy/configurex86	warning	Failed to apply patch
andriy/configureppc	warning	Failed to apply patch

On 6 Jan 2022, at 15:24, Romain Beauxis wrote: > * Implement support for AudioConverter > * Switch to AudioConverter's API to convert unsupported PCM > formats (non-interleaved, non-packed) to supported formats > * Minimize data copy. > > This fixes: https://trac.ffmpeg.org/ticket/9502 > > API ref: > https://developer.apple.com/documentation/audiotoolbox/audio_converter_services > > Signed-off-by: Romain Beauxis <toots@rastageeks.org> > --- > This is the first patch of a series of 3 that fix, cleanup and enhance > the > avfoundation implementation for libavdevice. > > These patches come from an actual user-facing application relying on > libavdevice’s implementation of avfoundation audio input. Without > them, > Avfoundation is practically unusable as it will: > * Refuse to process certain specific audio input format that are > actually > returned by the OS for some users (packed PCM audio) > * Drop audio frames, resulting in corrupted audio input. This might > have been > unnoticed with video frames but this makes avfoundation essentially > unusable > for audio. > > The patches are now being included in our production build so they are > tested > and usable in production. > Hi, the patches are still corrupt and do not apply. As stated earlier, please either use git send-email or attach the patch to the mail instead of putting its contents in it, as apparently Mail.app messes them up. > Changelog for this patch: > * v2: None > * v3: None > * v4: None > * v5: Fix indentation/wrapping > * v6: None > * v7: Removed use of kAudioConverterPropertyCalculateOutputBufferSize > to calculate output buffer size. The calculation is trivial and this > call was > randomly failing for no reason > * v8: None > * v9: None > > libavdevice/avfoundation.m | 255 +++++++++++++++++++++---------------- > 1 file changed, 145 insertions(+), 110 deletions(-) > > diff --git a/libavdevice/avfoundation.m b/libavdevice/avfoundation.m > index 0cd6e646d5..738cd93375 100644 > --- a/libavdevice/avfoundation.m > +++ b/libavdevice/avfoundation.m > @@ -111,16 +111,11 @@ > > int num_video_devices; > > - int audio_channels; > - int audio_bits_per_sample; > - int audio_float; > - int audio_be; > - int audio_signed_integer; > - int audio_packed; > - int audio_non_interleaved; > - > - int32_t *audio_buffer; > - int audio_buffer_size; > + UInt32 audio_buffers; > + UInt32 audio_channels; > + UInt32 input_bytes_per_sample; > + UInt32 output_bytes_per_sample; > + AudioConverterRef audio_converter; > > enum AVPixelFormat pixel_format; > > @@ -299,7 +294,10 @@ static void destroy_context(AVFContext* ctx) > ctx->avf_delegate = NULL; > ctx->avf_audio_delegate = NULL; > > - av_freep(&ctx->audio_buffer); > + if (ctx->audio_converter) { > + AudioConverterDispose(ctx->audio_converter); > + ctx->audio_converter = NULL; > + } > > pthread_mutex_destroy(&ctx->frame_lock); > > @@ -673,6 +671,10 @@ static int get_audio_config(AVFormatContext *s) > AVFContext *ctx = (AVFContext*)s->priv_data; > CMFormatDescriptionRef format_desc; > AVStream* stream = avformat_new_stream(s, NULL); > + AudioStreamBasicDescription output_format = {0}; > + int audio_bits_per_sample, audio_float, audio_be; > + int audio_signed_integer, audio_packed, audio_non_interleaved; > + int must_convert = 0; > > if (!stream) { > return 1; > @@ -690,60 +692,97 @@ static int get_audio_config(AVFormatContext *s) > avpriv_set_pts_info(stream, 64, 1, avf_time_base); > > format_desc = > CMSampleBufferGetFormatDescription(ctx->current_audio_frame); > - const AudioStreamBasicDescription *basic_desc = > CMAudioFormatDescriptionGetStreamBasicDescription(format_desc); > + const AudioStreamBasicDescription *input_format = > CMAudioFormatDescriptionGetStreamBasicDescription(format_desc); > > - if (!basic_desc) { > + if (!input_format) { > unlock_frames(ctx); > av_log(s, AV_LOG_ERROR, "audio format not available\n"); > return 1; > } > > + if (input_format->mFormatID != kAudioFormatLinearPCM) { > + unlock_frames(ctx); > + av_log(s, AV_LOG_ERROR, "only PCM audio format are supported > at the moment\n"); > + return 1; > + } > + > stream->codecpar->codec_type = AVMEDIA_TYPE_AUDIO; > - stream->codecpar->sample_rate = basic_desc->mSampleRate; > - stream->codecpar->channels = basic_desc->mChannelsPerFrame; > + stream->codecpar->sample_rate = input_format->mSampleRate; > + stream->codecpar->channels = > input_format->mChannelsPerFrame; > stream->codecpar->channel_layout = > av_get_default_channel_layout(stream->codecpar->channels); > > - ctx->audio_channels = basic_desc->mChannelsPerFrame; > - ctx->audio_bits_per_sample = basic_desc->mBitsPerChannel; > - ctx->audio_float = basic_desc->mFormatFlags & > kAudioFormatFlagIsFloat; > - ctx->audio_be = basic_desc->mFormatFlags & > kAudioFormatFlagIsBigEndian; > - ctx->audio_signed_integer = basic_desc->mFormatFlags & > kAudioFormatFlagIsSignedInteger; > - ctx->audio_packed = basic_desc->mFormatFlags & > kAudioFormatFlagIsPacked; > - ctx->audio_non_interleaved = basic_desc->mFormatFlags & > kAudioFormatFlagIsNonInterleaved; > - > - if (basic_desc->mFormatID == kAudioFormatLinearPCM && > - ctx->audio_float && > - ctx->audio_bits_per_sample == 32 && > - ctx->audio_packed) { > - stream->codecpar->codec_id = ctx->audio_be ? > AV_CODEC_ID_PCM_F32BE : AV_CODEC_ID_PCM_F32LE; > - } else if (basic_desc->mFormatID == kAudioFormatLinearPCM && > - ctx->audio_signed_integer && > - ctx->audio_bits_per_sample == 16 && > - ctx->audio_packed) { > - stream->codecpar->codec_id = ctx->audio_be ? > AV_CODEC_ID_PCM_S16BE : AV_CODEC_ID_PCM_S16LE; > - } else if (basic_desc->mFormatID == kAudioFormatLinearPCM && > - ctx->audio_signed_integer && > - ctx->audio_bits_per_sample == 24 && > - ctx->audio_packed) { > - stream->codecpar->codec_id = ctx->audio_be ? > AV_CODEC_ID_PCM_S24BE : AV_CODEC_ID_PCM_S24LE; > - } else if (basic_desc->mFormatID == kAudioFormatLinearPCM && > - ctx->audio_signed_integer && > - ctx->audio_bits_per_sample == 32 && > - ctx->audio_packed) { > - stream->codecpar->codec_id = ctx->audio_be ? > AV_CODEC_ID_PCM_S32BE : AV_CODEC_ID_PCM_S32LE; > + audio_bits_per_sample = input_format->mBitsPerChannel; > + audio_float = input_format->mFormatFlags & > kAudioFormatFlagIsFloat; > + audio_be = input_format->mFormatFlags & > kAudioFormatFlagIsBigEndian; > + audio_signed_integer = input_format->mFormatFlags & > kAudioFormatFlagIsSignedInteger; > + audio_packed = input_format->mFormatFlags & > kAudioFormatFlagIsPacked; > + audio_non_interleaved = input_format->mFormatFlags & > kAudioFormatFlagIsNonInterleaved; > + > + ctx->input_bytes_per_sample = input_format->mBitsPerChannel >> > 3; > + ctx->output_bytes_per_sample = ctx->input_bytes_per_sample; > + ctx->audio_channels = input_format->mChannelsPerFrame; > + > + if (audio_non_interleaved) { > + ctx->audio_buffers = input_format->mChannelsPerFrame; > } else { > - unlock_frames(ctx); > - av_log(s, AV_LOG_ERROR, "audio format is not supported\n"); > - return 1; > + ctx->audio_buffers = 1; > + } > + > + if (audio_non_interleaved || !audio_packed) { > + must_convert = 1; > + } > + > + output_format.mBitsPerChannel = input_format->mBitsPerChannel; > + output_format.mChannelsPerFrame = ctx->audio_channels; > + output_format.mFramesPerPacket = 1; > + output_format.mBytesPerFrame = output_format.mChannelsPerFrame > * ctx->input_bytes_per_sample; > + output_format.mBytesPerPacket = output_format.mFramesPerPacket > * output_format.mBytesPerFrame; > + output_format.mFormatFlags = kAudioFormatFlagIsPacked | > audio_be; > + output_format.mFormatID = kAudioFormatLinearPCM; > + output_format.mReserved = 0; > + output_format.mSampleRate = input_format->mSampleRate; > + > + if (audio_float && > + audio_bits_per_sample == 32) { > + stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_F32BE > : AV_CODEC_ID_PCM_F32LE; > + output_format.mFormatFlags |= kAudioFormatFlagIsFloat; > + } else if (audio_float && > + audio_bits_per_sample == 64) { > + stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_F64BE > : AV_CODEC_ID_PCM_F64LE; > + output_format.mFormatFlags |= kAudioFormatFlagIsFloat; > + } else if (audio_signed_integer && > + audio_bits_per_sample == 8) { > + stream->codecpar->codec_id = AV_CODEC_ID_PCM_S8; > + output_format.mFormatFlags |= > kAudioFormatFlagIsSignedInteger; > + } else if (audio_signed_integer && > + audio_bits_per_sample == 16) { > + stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S16BE > : AV_CODEC_ID_PCM_S16LE; > + output_format.mFormatFlags |= > kAudioFormatFlagIsSignedInteger; > + } else if (audio_signed_integer && > + audio_bits_per_sample == 24) { > + stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S24BE > : AV_CODEC_ID_PCM_S24LE; > + output_format.mFormatFlags |= > kAudioFormatFlagIsSignedInteger; > + } else if (audio_signed_integer && > + audio_bits_per_sample == 32) { > + stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S32BE > : AV_CODEC_ID_PCM_S32LE; > + output_format.mFormatFlags |= > kAudioFormatFlagIsSignedInteger; > + } else if (audio_signed_integer && > + audio_bits_per_sample == 64) { > + stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S64BE > : AV_CODEC_ID_PCM_S64LE; > + output_format.mFormatFlags |= > kAudioFormatFlagIsSignedInteger; > + } else { > + stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S32BE > : AV_CODEC_ID_PCM_S32LE; > + ctx->output_bytes_per_sample = 4; > + output_format.mBitsPerChannel = 32; > + output_format.mFormatFlags |= > kAudioFormatFlagIsSignedInteger; > + must_convert = 1; > } > > - if (ctx->audio_non_interleaved) { > - CMBlockBufferRef block_buffer = > CMSampleBufferGetDataBuffer(ctx->current_audio_frame); > - ctx->audio_buffer_size = > CMBlockBufferGetDataLength(block_buffer); > - ctx->audio_buffer = > av_malloc(ctx->audio_buffer_size); > - if (!ctx->audio_buffer) { > + if (must_convert) { > + OSStatus ret = AudioConverterNew(input_format, > &output_format, &ctx->audio_converter); > + if (ret != noErr) { > unlock_frames(ctx); > - av_log(s, AV_LOG_ERROR, "error allocating audio > buffer\n"); > + av_log(s, AV_LOG_ERROR, "Error while allocating audio > converter\n"); > return 1; > } > } > @@ -1048,6 +1087,7 @@ static int copy_cvpixelbuffer(AVFormatContext > *s, > > static int avf_read_packet(AVFormatContext *s, AVPacket *pkt) > { > + OSStatus ret; > AVFContext* ctx = (AVFContext*)s->priv_data; > > do { > @@ -1091,7 +1131,7 @@ static int avf_read_packet(AVFormatContext *s, > AVPacket *pkt) > status = copy_cvpixelbuffer(s, image_buffer, pkt); > } else { > status = 0; > - OSStatus ret = > CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, pkt->data); > + ret = CMBlockBufferCopyDataBytes(block_buffer, 0, > pkt->size, pkt->data); > if (ret != kCMBlockBufferNoErr) { > status = AVERROR(EIO); > } > @@ -1105,21 +1145,60 @@ static int avf_read_packet(AVFormatContext *s, > AVPacket *pkt) > } > } else if (ctx->current_audio_frame != nil) { > CMBlockBufferRef block_buffer = > CMSampleBufferGetDataBuffer(ctx->current_audio_frame); > - int block_buffer_size = > CMBlockBufferGetDataLength(block_buffer); > > - if (!block_buffer || !block_buffer_size) { > - unlock_frames(ctx); > - return AVERROR(EIO); > - } > + size_t input_size = > CMBlockBufferGetDataLength(block_buffer); > + int buffer_size = input_size / ctx->audio_buffers; > + int nb_samples = input_size / (ctx->audio_channels * > ctx->input_bytes_per_sample); > + int output_size = nb_samples * > ctx->output_bytes_per_sample * ctx->audio_channels; > > - if (ctx->audio_non_interleaved && block_buffer_size > > ctx->audio_buffer_size) { > - unlock_frames(ctx); > - return AVERROR_BUFFER_TOO_SMALL; > + status = av_new_packet(pkt, output_size); > + if (status < 0) { > + CFRelease(audio_frame); > + return status; > } > > - if (av_new_packet(pkt, block_buffer_size) < 0) { > - unlock_frames(ctx); > - return AVERROR(EIO); > + if (ctx->audio_converter) { > + size_t input_buffer_size = offsetof(AudioBufferList, > mBuffers[0]) + (sizeof(AudioBuffer) * ctx->audio_buffers); > + AudioBufferList *input_buffer = > av_malloc(input_buffer_size); > + > + input_buffer->mNumberBuffers = ctx->audio_buffers; > + > + for (int c = 0; c < ctx->audio_buffers; c++) { > + input_buffer->mBuffers[c].mNumberChannels = 1; > + > + ret = CMBlockBufferGetDataPointer(block_buffer, c > * buffer_size, (size_t *)&input_buffer->mBuffers[c].mDataByteSize, > NULL, (void *)&input_buffer->mBuffers[c].mData); > + > + if (ret != kCMBlockBufferNoErr) { > + av_free(input_buffer); > + unlock_frames(ctx); > + return AVERROR(EIO); > + } > + } > + > + AudioBufferList output_buffer = { > + .mNumberBuffers = 1, > + .mBuffers[0] = { > + .mNumberChannels = ctx->audio_channels, > + .mDataByteSize = pkt->size, > + .mData = pkt->data > + } > + }; > + > + ret = > AudioConverterConvertComplexBuffer(ctx->audio_converter, nb_samples, > input_buffer, &output_buffer); > + av_free(input_buffer); > + > + if (ret != noErr) { > + unlock_frames(ctx); > + return AVERROR(EIO); > + } > + > + pkt->size = output_buffer.mBuffers[0].mDataByteSize; > + } else { > + ret = CMBlockBufferCopyDataBytes(block_buffer, 0, > pkt->size, pkt->data); > + if (ret != kCMBlockBufferNoErr) { > + unlock_frames(ctx); > + return AVERROR(EIO); > + } > } > > CMItemCount count; > @@ -1133,54 +1212,10 @@ static int avf_read_packet(AVFormatContext *s, > AVPacket *pkt) > pkt->stream_index = ctx->audio_stream_index; > pkt->flags |= AV_PKT_FLAG_KEY; > > - if (ctx->audio_non_interleaved) { > - int sample, c, shift, num_samples; > - > - OSStatus ret = > CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, > ctx->audio_buffer); > - if (ret != kCMBlockBufferNoErr) { > - unlock_frames(ctx); > - return AVERROR(EIO); > - } > - > - num_samples = pkt->size / (ctx->audio_channels * > (ctx->audio_bits_per_sample >> 3)); > - > - // transform decoded frame into output format > - #define INTERLEAVE_OUTPUT(bps) > \ > - { > \ > - int##bps##_t **src; > \ > - int##bps##_t *dest; > \ > - src = av_malloc(ctx->audio_channels * > sizeof(int##bps##_t*)); \ > - if (!src) { > \ > - unlock_frames(ctx); > \ > - return AVERROR(EIO); > \ > - } > \ > - > \ > - for (c = 0; c < ctx->audio_channels; c++) { > \ > - src[c] = ((int##bps##_t*)ctx->audio_buffer) + > c * num_samples; \ > - } > \ > - dest = (int##bps##_t*)pkt->data; > \ > - shift = bps - ctx->audio_bits_per_sample; > \ > - for (sample = 0; sample < num_samples; sample++) > \ > - for (c = 0; c < ctx->audio_channels; c++) > \ > - *dest++ = src[c][sample] << shift; > \ > - av_freep(&src); > \ > - } > - > - if (ctx->audio_bits_per_sample <= 16) { > - INTERLEAVE_OUTPUT(16) > - } else { > - INTERLEAVE_OUTPUT(32) > - } > - } else { > - OSStatus ret = > CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, pkt->data); > - if (ret != kCMBlockBufferNoErr) { > - unlock_frames(ctx); > - return AVERROR(EIO); > - } > - } > - > CFRelease(ctx->current_audio_frame); > ctx->current_audio_frame = nil; > + > + unlock_frames(ctx); > } else { > pkt->data = NULL; > unlock_frames(ctx); > -- > 2.32.0 (Apple Git-132)

diff --git a/libavdevice/avfoundation.m b/libavdevice/avfoundation.m index 0cd6e646d5..738cd93375 100644 --- a/libavdevice/avfoundation.m +++ b/libavdevice/avfoundation.m @@ -111,16 +111,11 @@ int num_video_devices; - int audio_channels; - int audio_bits_per_sample; - int audio_float; - int audio_be; - int audio_signed_integer; - int audio_packed; - int audio_non_interleaved; - - int32_t *audio_buffer; - int audio_buffer_size; + UInt32 audio_buffers; + UInt32 audio_channels; + UInt32 input_bytes_per_sample; + UInt32 output_bytes_per_sample; + AudioConverterRef audio_converter; enum AVPixelFormat pixel_format; @@ -299,7 +294,10 @@ static void destroy_context(AVFContext* ctx) ctx->avf_delegate = NULL; ctx->avf_audio_delegate = NULL; - av_freep(&ctx->audio_buffer); + if (ctx->audio_converter) { + AudioConverterDispose(ctx->audio_converter); + ctx->audio_converter = NULL; + } pthread_mutex_destroy(&ctx->frame_lock); @@ -673,6 +671,10 @@ static int get_audio_config(AVFormatContext *s) AVFContext *ctx = (AVFContext*)s->priv_data; CMFormatDescriptionRef format_desc; AVStream* stream = avformat_new_stream(s, NULL); + AudioStreamBasicDescription output_format = {0}; + int audio_bits_per_sample, audio_float, audio_be; + int audio_signed_integer, audio_packed, audio_non_interleaved; + int must_convert = 0; if (!stream) { return 1; @@ -690,60 +692,97 @@ static int get_audio_config(AVFormatContext *s) avpriv_set_pts_info(stream, 64, 1, avf_time_base); format_desc = CMSampleBufferGetFormatDescription(ctx->current_audio_frame); - const AudioStreamBasicDescription *basic_desc = CMAudioFormatDescriptionGetStreamBasicDescription(format_desc); + const AudioStreamBasicDescription *input_format = CMAudioFormatDescriptionGetStreamBasicDescription(format_desc); - if (!basic_desc) { + if (!input_format) { unlock_frames(ctx); av_log(s, AV_LOG_ERROR, "audio format not available\n"); return 1; } + if (input_format->mFormatID != kAudioFormatLinearPCM) { + unlock_frames(ctx); + av_log(s, AV_LOG_ERROR, "only PCM audio format are supported at the moment\n"); + return 1; + } + stream->codecpar->codec_type = AVMEDIA_TYPE_AUDIO; - stream->codecpar->sample_rate = basic_desc->mSampleRate; - stream->codecpar->channels = basic_desc->mChannelsPerFrame; + stream->codecpar->sample_rate = input_format->mSampleRate; + stream->codecpar->channels = input_format->mChannelsPerFrame; stream->codecpar->channel_layout = av_get_default_channel_layout(stream->codecpar->channels); - ctx->audio_channels = basic_desc->mChannelsPerFrame; - ctx->audio_bits_per_sample = basic_desc->mBitsPerChannel; - ctx->audio_float = basic_desc->mFormatFlags & kAudioFormatFlagIsFloat; - ctx->audio_be = basic_desc->mFormatFlags & kAudioFormatFlagIsBigEndian; - ctx->audio_signed_integer = basic_desc->mFormatFlags & kAudioFormatFlagIsSignedInteger; - ctx->audio_packed = basic_desc->mFormatFlags & kAudioFormatFlagIsPacked; - ctx->audio_non_interleaved = basic_desc->mFormatFlags & kAudioFormatFlagIsNonInterleaved; - - if (basic_desc->mFormatID == kAudioFormatLinearPCM && - ctx->audio_float && - ctx->audio_bits_per_sample == 32 && - ctx->audio_packed) { - stream->codecpar->codec_id = ctx->audio_be ? AV_CODEC_ID_PCM_F32BE : AV_CODEC_ID_PCM_F32LE; - } else if (basic_desc->mFormatID == kAudioFormatLinearPCM && - ctx->audio_signed_integer && - ctx->audio_bits_per_sample == 16 && - ctx->audio_packed) { - stream->codecpar->codec_id = ctx->audio_be ? AV_CODEC_ID_PCM_S16BE : AV_CODEC_ID_PCM_S16LE; - } else if (basic_desc->mFormatID == kAudioFormatLinearPCM && - ctx->audio_signed_integer && - ctx->audio_bits_per_sample == 24 && - ctx->audio_packed) { - stream->codecpar->codec_id = ctx->audio_be ? AV_CODEC_ID_PCM_S24BE : AV_CODEC_ID_PCM_S24LE; - } else if (basic_desc->mFormatID == kAudioFormatLinearPCM && - ctx->audio_signed_integer && - ctx->audio_bits_per_sample == 32 && - ctx->audio_packed) { - stream->codecpar->codec_id = ctx->audio_be ? AV_CODEC_ID_PCM_S32BE : AV_CODEC_ID_PCM_S32LE; + audio_bits_per_sample = input_format->mBitsPerChannel; + audio_float = input_format->mFormatFlags & kAudioFormatFlagIsFloat; + audio_be = input_format->mFormatFlags & kAudioFormatFlagIsBigEndian; + audio_signed_integer = input_format->mFormatFlags & kAudioFormatFlagIsSignedInteger; + audio_packed = input_format->mFormatFlags & kAudioFormatFlagIsPacked; + audio_non_interleaved = input_format->mFormatFlags & kAudioFormatFlagIsNonInterleaved; + + ctx->input_bytes_per_sample = input_format->mBitsPerChannel >> 3; + ctx->output_bytes_per_sample = ctx->input_bytes_per_sample; + ctx->audio_channels = input_format->mChannelsPerFrame; + + if (audio_non_interleaved) { + ctx->audio_buffers = input_format->mChannelsPerFrame; } else { - unlock_frames(ctx); - av_log(s, AV_LOG_ERROR, "audio format is not supported\n"); - return 1; + ctx->audio_buffers = 1; + } + + if (audio_non_interleaved || !audio_packed) { + must_convert = 1; + } + + output_format.mBitsPerChannel = input_format->mBitsPerChannel; + output_format.mChannelsPerFrame = ctx->audio_channels; + output_format.mFramesPerPacket = 1; + output_format.mBytesPerFrame = output_format.mChannelsPerFrame * ctx->input_bytes_per_sample; + output_format.mBytesPerPacket = output_format.mFramesPerPacket * output_format.mBytesPerFrame; + output_format.mFormatFlags = kAudioFormatFlagIsPacked | audio_be; + output_format.mFormatID = kAudioFormatLinearPCM; + output_format.mReserved = 0; + output_format.mSampleRate = input_format->mSampleRate; + + if (audio_float && + audio_bits_per_sample == 32) { + stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_F32BE : AV_CODEC_ID_PCM_F32LE; + output_format.mFormatFlags |= kAudioFormatFlagIsFloat; + } else if (audio_float && + audio_bits_per_sample == 64) { + stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_F64BE : AV_CODEC_ID_PCM_F64LE; + output_format.mFormatFlags |= kAudioFormatFlagIsFloat; + } else if (audio_signed_integer && + audio_bits_per_sample == 8) { + stream->codecpar->codec_id = AV_CODEC_ID_PCM_S8; + output_format.mFormatFlags |= kAudioFormatFlagIsSignedInteger; + } else if (audio_signed_integer && + audio_bits_per_sample == 16) { + stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S16BE : AV_CODEC_ID_PCM_S16LE; + output_format.mFormatFlags |= kAudioFormatFlagIsSignedInteger; + } else if (audio_signed_integer && + audio_bits_per_sample == 24) { + stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S24BE : AV_CODEC_ID_PCM_S24LE; + output_format.mFormatFlags |= kAudioFormatFlagIsSignedInteger; + } else if (audio_signed_integer && + audio_bits_per_sample == 32) { + stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S32BE : AV_CODEC_ID_PCM_S32LE; + output_format.mFormatFlags |= kAudioFormatFlagIsSignedInteger; + } else if (audio_signed_integer && + audio_bits_per_sample == 64) { + stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S64BE : AV_CODEC_ID_PCM_S64LE; + output_format.mFormatFlags |= kAudioFormatFlagIsSignedInteger; + } else { + stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S32BE : AV_CODEC_ID_PCM_S32LE; + ctx->output_bytes_per_sample = 4; + output_format.mBitsPerChannel = 32; + output_format.mFormatFlags |= kAudioFormatFlagIsSignedInteger; + must_convert = 1; } - if (ctx->audio_non_interleaved) { - CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(ctx->current_audio_frame); - ctx->audio_buffer_size = CMBlockBufferGetDataLength(block_buffer); - ctx->audio_buffer = av_malloc(ctx->audio_buffer_size); - if (!ctx->audio_buffer) { + if (must_convert) { + OSStatus ret = AudioConverterNew(input_format, &output_format, &ctx->audio_converter); + if (ret != noErr) { unlock_frames(ctx); - av_log(s, AV_LOG_ERROR, "error allocating audio buffer\n"); + av_log(s, AV_LOG_ERROR, "Error while allocating audio converter\n"); return 1; } } @@ -1048,6 +1087,7 @@ static int copy_cvpixelbuffer(AVFormatContext *s, static int avf_read_packet(AVFormatContext *s, AVPacket *pkt) { + OSStatus ret; AVFContext* ctx = (AVFContext*)s->priv_data; do { @@ -1091,7 +1131,7 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt) status = copy_cvpixelbuffer(s, image_buffer, pkt); } else { status = 0; - OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, pkt->data); + ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, pkt->data); if (ret != kCMBlockBufferNoErr) { status = AVERROR(EIO); } @@ -1105,21 +1145,60 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt) } } else if (ctx->current_audio_frame != nil) { CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(ctx->current_audio_frame); - int block_buffer_size = CMBlockBufferGetDataLength(block_buffer); - if (!block_buffer || !block_buffer_size) { - unlock_frames(ctx); - return AVERROR(EIO); - } + size_t input_size = CMBlockBufferGetDataLength(block_buffer); + int buffer_size = input_size / ctx->audio_buffers; + int nb_samples = input_size / (ctx->audio_channels * ctx->input_bytes_per_sample); + int output_size = nb_samples * ctx->output_bytes_per_sample * ctx->audio_channels; - if (ctx->audio_non_interleaved && block_buffer_size > ctx->audio_buffer_size) { - unlock_frames(ctx); - return AVERROR_BUFFER_TOO_SMALL; + status = av_new_packet(pkt, output_size); + if (status < 0) { + CFRelease(audio_frame); + return status; } - if (av_new_packet(pkt, block_buffer_size) < 0) { - unlock_frames(ctx); - return AVERROR(EIO); + if (ctx->audio_converter) { + size_t input_buffer_size = offsetof(AudioBufferList, mBuffers[0]) + (sizeof(AudioBuffer) * ctx->audio_buffers); + AudioBufferList *input_buffer = av_malloc(input_buffer_size); + + input_buffer->mNumberBuffers = ctx->audio_buffers; + + for (int c = 0; c < ctx->audio_buffers; c++) { + input_buffer->mBuffers[c].mNumberChannels = 1; + + ret = CMBlockBufferGetDataPointer(block_buffer, c * buffer_size, (size_t *)&input_buffer->mBuffers[c].mDataByteSize, NULL, (void *)&input_buffer->mBuffers[c].mData); + + if (ret != kCMBlockBufferNoErr) { + av_free(input_buffer); + unlock_frames(ctx); + return AVERROR(EIO); + } + } + + AudioBufferList output_buffer = { + .mNumberBuffers = 1, + .mBuffers[0] = { + .mNumberChannels = ctx->audio_channels, + .mDataByteSize = pkt->size, + .mData = pkt->data + } + }; + + ret = AudioConverterConvertComplexBuffer(ctx->audio_converter, nb_samples, input_buffer, &output_buffer); + av_free(input_buffer); + + if (ret != noErr) { + unlock_frames(ctx); + return AVERROR(EIO); + } + + pkt->size = output_buffer.mBuffers[0].mDataByteSize; + } else { + ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, pkt->data); + if (ret != kCMBlockBufferNoErr) { + unlock_frames(ctx); + return AVERROR(EIO); + } } CMItemCount count; @@ -1133,54 +1212,10 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt) pkt->stream_index = ctx->audio_stream_index; pkt->flags |= AV_PKT_FLAG_KEY; - if (ctx->audio_non_interleaved) { - int sample, c, shift, num_samples; - - OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, ctx->audio_buffer); - if (ret != kCMBlockBufferNoErr) { - unlock_frames(ctx); - return AVERROR(EIO); - } - - num_samples = pkt->size / (ctx->audio_channels * (ctx->audio_bits_per_sample >> 3)); - - // transform decoded frame into output format - #define INTERLEAVE_OUTPUT(bps) \ - { \ - int##bps##_t **src; \ - int##bps##_t *dest; \ - src = av_malloc(ctx->audio_channels * sizeof(int##bps##_t*)); \ - if (!src) { \ - unlock_frames(ctx); \ - return AVERROR(EIO); \ - } \ - \ - for (c = 0; c < ctx->audio_channels; c++) { \ - src[c] = ((int##bps##_t*)ctx->audio_buffer) + c * num_samples; \ - } \ - dest = (int##bps##_t*)pkt->data; \ - shift = bps - ctx->audio_bits_per_sample; \ - for (sample = 0; sample < num_samples; sample++) \ - for (c = 0; c < ctx->audio_channels; c++) \ - *dest++ = src[c][sample] << shift; \ - av_freep(&src); \ - } - - if (ctx->audio_bits_per_sample <= 16) { - INTERLEAVE_OUTPUT(16) - } else { - INTERLEAVE_OUTPUT(32) - } - } else { - OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, pkt->data); - if (ret != kCMBlockBufferNoErr) { - unlock_frames(ctx); - return AVERROR(EIO); - } - } - CFRelease(ctx->current_audio_frame); ctx->current_audio_frame = nil; + + unlock_frames(ctx); } else { pkt->data = NULL; unlock_frames(ctx);

[FFmpeg-devel,v9,1/3] libavdevice/avfoundation.m: use AudioConvert, extend supported formats

Checks

Commit Message

Comments

Patch