diff mbox

[FFmpeg-devel,v2] lavc/audiotoolboxenc: fix noise in encoded audio

Message ID 20180102150327.4673-1-zhangjiejun1992@gmail.com
State Superseded
Headers show

Commit Message

zhangjiejun1992@gmail.com Jan. 2, 2018, 3:03 p.m. UTC
From: Jiejun Zhang <zhangjiejun1992@gmail.com>

This fixes #6940

Although undocumented, AudioToolbox seems to require the data supplied
by the callback (i.e. ffat_encode_callback) being unchanged until the
next time the callback is called. In the old implementation, the
AVBuffer backing the frame is recycled after the frame is freed, and
somebody else (maybe the decoder) will write into the AVBuffer and
change the data. AudioToolbox then encodes some wrong data and noise
is produced. Copying the data to a separate buffer solves this
problem.
---
 libavcodec/audiotoolboxenc.c | 32 +++++++++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 5 deletions(-)

Comments

James Almer Jan. 2, 2018, 3:31 p.m. UTC | #1
On 1/2/2018 12:03 PM, zhangjiejun1992@gmail.com wrote:
> From: Jiejun Zhang <zhangjiejun1992@gmail.com>
> 
> This fixes #6940
> 
> Although undocumented, AudioToolbox seems to require the data supplied
> by the callback (i.e. ffat_encode_callback) being unchanged until the
> next time the callback is called. In the old implementation, the
> AVBuffer backing the frame is recycled after the frame is freed, and
> somebody else (maybe the decoder) will write into the AVBuffer and
> change the data. AudioToolbox then encodes some wrong data and noise
> is produced. Copying the data to a separate buffer solves this
> problem.
> ---
>  libavcodec/audiotoolboxenc.c | 32 +++++++++++++++++++++++++++-----
>  1 file changed, 27 insertions(+), 5 deletions(-)
> 
> diff --git a/libavcodec/audiotoolboxenc.c b/libavcodec/audiotoolboxenc.c
> index 71885d1530..dcac88cdde 100644
> --- a/libavcodec/audiotoolboxenc.c
> +++ b/libavcodec/audiotoolboxenc.c
> @@ -48,6 +48,9 @@ typedef struct ATDecodeContext {
>      AudioFrameQueue afq;
>      int eof;
>      int frame_size;
> +
> +    uint8_t* audio_data_buf;
> +    uint32_t audio_data_buf_size;
>  } ATDecodeContext;
>  
>  static UInt32 ffat_get_format_id(enum AVCodecID codec, int profile)
> @@ -442,6 +445,9 @@ static av_cold int ffat_init_encoder(AVCodecContext *avctx)
>  
>      ff_af_queue_init(avctx, &at->afq);
>  
> +    at->audio_data_buf_size = 0;
> +    at->audio_data_buf = NULL;

The entire ATDecodeContext struct is zero initialized when allocated, so
this is not needed.

> +
>      return 0;
>  }
>  
> @@ -465,13 +471,27 @@ static OSStatus ffat_encode_callback(AudioConverterRef converter, UInt32 *nb_pac
>      }
>  
>      frame = ff_bufqueue_get(&at->frame_queue);
> -
> +    int audio_data_size = frame->nb_samples *
> +                          av_get_bytes_per_sample(avctx->sample_fmt) *
> +                          avctx->channels;
> +    if (at->audio_data_buf_size < audio_data_size) {
> +        av_log(avctx, AV_LOG_INFO, "Increasing audio data buffer size to %d\n",

Verbose or debug level, please.

> +               audio_data_size);
> +        av_free(at->audio_data_buf);
> +        at->audio_data_buf_size = audio_data_size;
> +        at->audio_data_buf = av_malloc(at->audio_data_buf_size);
> +        if (!at->audio_data_buf) {
> +            at->audio_data_buf_size = 0;
> +            data->mNumberBuffers = 0;
> +            *nb_packets = 0;
> +            return AVERROR(ENOMEM);
> +        }
> +    }
>      data->mNumberBuffers              = 1;
>      data->mBuffers[0].mNumberChannels = avctx->channels;
> -    data->mBuffers[0].mDataByteSize   = frame->nb_samples *
> -                                        av_get_bytes_per_sample(avctx->sample_fmt) *
> -                                        avctx->channels;
> -    data->mBuffers[0].mData           = frame->data[0];
> +    data->mBuffers[0].mDataByteSize   = audio_data_size;
> +    data->mBuffers[0].mData           = at->audio_data_buf;
> +    memcpy(at->audio_data_buf, frame->data[0], data->mBuffers[0].mDataByteSize);

Can't you instead create a new reference for the frame buffer? Or will
making it non writable break things further into the process? It would
save you a memcpy per frame.

>      if (*nb_packets > frame->nb_samples)
>          *nb_packets = frame->nb_samples;
>  
> @@ -565,6 +585,8 @@ static av_cold int ffat_close_encoder(AVCodecContext *avctx)
>      ff_bufqueue_discard_all(&at->frame_queue);
>      ff_bufqueue_discard_all(&at->used_frame_queue);
>      ff_af_queue_close(&at->afq);
> +    at->audio_data_buf_size = 0;
> +    av_freep(&at->audio_data_buf);
>      return 0;
>  }
>  
>
zhangjiejun1992@gmail.com Jan. 2, 2018, 4:23 p.m. UTC | #2
>
> Can't you instead create a new reference for the frame buffer? Or will
> making it non writable break things further into the process? It would
> save you a memcpy per frame.

Great idea. It works. Making it non-writable should be enough. I'm
submitting v3. Please take a look.
diff mbox

Patch

diff --git a/libavcodec/audiotoolboxenc.c b/libavcodec/audiotoolboxenc.c
index 71885d1530..dcac88cdde 100644
--- a/libavcodec/audiotoolboxenc.c
+++ b/libavcodec/audiotoolboxenc.c
@@ -48,6 +48,9 @@  typedef struct ATDecodeContext {
     AudioFrameQueue afq;
     int eof;
     int frame_size;
+
+    uint8_t* audio_data_buf;
+    uint32_t audio_data_buf_size;
 } ATDecodeContext;
 
 static UInt32 ffat_get_format_id(enum AVCodecID codec, int profile)
@@ -442,6 +445,9 @@  static av_cold int ffat_init_encoder(AVCodecContext *avctx)
 
     ff_af_queue_init(avctx, &at->afq);
 
+    at->audio_data_buf_size = 0;
+    at->audio_data_buf = NULL;
+
     return 0;
 }
 
@@ -465,13 +471,27 @@  static OSStatus ffat_encode_callback(AudioConverterRef converter, UInt32 *nb_pac
     }
 
     frame = ff_bufqueue_get(&at->frame_queue);
-
+    int audio_data_size = frame->nb_samples *
+                          av_get_bytes_per_sample(avctx->sample_fmt) *
+                          avctx->channels;
+    if (at->audio_data_buf_size < audio_data_size) {
+        av_log(avctx, AV_LOG_INFO, "Increasing audio data buffer size to %d\n",
+               audio_data_size);
+        av_free(at->audio_data_buf);
+        at->audio_data_buf_size = audio_data_size;
+        at->audio_data_buf = av_malloc(at->audio_data_buf_size);
+        if (!at->audio_data_buf) {
+            at->audio_data_buf_size = 0;
+            data->mNumberBuffers = 0;
+            *nb_packets = 0;
+            return AVERROR(ENOMEM);
+        }
+    }
     data->mNumberBuffers              = 1;
     data->mBuffers[0].mNumberChannels = avctx->channels;
-    data->mBuffers[0].mDataByteSize   = frame->nb_samples *
-                                        av_get_bytes_per_sample(avctx->sample_fmt) *
-                                        avctx->channels;
-    data->mBuffers[0].mData           = frame->data[0];
+    data->mBuffers[0].mDataByteSize   = audio_data_size;
+    data->mBuffers[0].mData           = at->audio_data_buf;
+    memcpy(at->audio_data_buf, frame->data[0], data->mBuffers[0].mDataByteSize);
     if (*nb_packets > frame->nb_samples)
         *nb_packets = frame->nb_samples;
 
@@ -565,6 +585,8 @@  static av_cold int ffat_close_encoder(AVCodecContext *avctx)
     ff_bufqueue_discard_all(&at->frame_queue);
     ff_bufqueue_discard_all(&at->used_frame_queue);
     ff_af_queue_close(&at->afq);
+    at->audio_data_buf_size = 0;
+    av_freep(&at->audio_data_buf);
     return 0;
 }