diff mbox

[FFmpeg-devel,v3] lavc/audiotoolboxenc: fix noise in encoded audio

Message ID 20180102162426.7433-1-zhangjiejun1992@gmail.com
State Superseded
Headers show

Commit Message

zhangjiejun1992@gmail.com Jan. 2, 2018, 4:24 p.m. UTC
From: Jiejun Zhang <zhangjiejun1992@gmail.com>

This fixes #6940

Although undocumented, AudioToolbox seems to require the data supplied
by the callback (i.e. ffat_encode_callback) being unchanged until the
next time the callback is called. In the old implementation, the
AVBuffer backing the frame is recycled after the frame is freed, and
somebody else (maybe the decoder) will write into the AVBuffer and
change the data. AudioToolbox then encodes some wrong data and noise
is produced. Retaining a frame reference solves this problem.
---
 libavcodec/audiotoolboxenc.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

Comments

James Almer Jan. 3, 2018, 2:02 a.m. UTC | #1
On 1/2/2018 1:24 PM, zhangjiejun1992@gmail.com wrote:
> From: Jiejun Zhang <zhangjiejun1992@gmail.com>
> 
> This fixes #6940
> 
> Although undocumented, AudioToolbox seems to require the data supplied
> by the callback (i.e. ffat_encode_callback) being unchanged until the
> next time the callback is called. In the old implementation, the
> AVBuffer backing the frame is recycled after the frame is freed, and
> somebody else (maybe the decoder) will write into the AVBuffer and
> change the data. AudioToolbox then encodes some wrong data and noise
> is produced. Retaining a frame reference solves this problem.
> ---
>  libavcodec/audiotoolboxenc.c | 12 ++++++++++++
>  1 file changed, 12 insertions(+)
> 
> diff --git a/libavcodec/audiotoolboxenc.c b/libavcodec/audiotoolboxenc.c
> index 71885d1530..5d3a746348 100644
> --- a/libavcodec/audiotoolboxenc.c
> +++ b/libavcodec/audiotoolboxenc.c
> @@ -48,6 +48,8 @@ typedef struct ATDecodeContext {
>      AudioFrameQueue afq;
>      int eof;
>      int frame_size;
> +
> +    AVFrame* encoding_frame;
>  } ATDecodeContext;
>  
>  static UInt32 ffat_get_format_id(enum AVCodecID codec, int profile)
> @@ -442,6 +444,10 @@ static av_cold int ffat_init_encoder(AVCodecContext *avctx)
>  
>      ff_af_queue_init(avctx, &at->afq);
>  
> +    at->encoding_frame = av_frame_alloc();
> +    if (!at->encoding_frame)
> +        return AVERROR(ENOMEM);
> +
>      return 0;
>  }
>  
> @@ -453,6 +459,7 @@ static OSStatus ffat_encode_callback(AudioConverterRef converter, UInt32 *nb_pac
>      AVCodecContext *avctx = inctx;
>      ATDecodeContext *at = avctx->priv_data;
>      AVFrame *frame;
> +    int ret;
>  
>      if (!at->frame_queue.available) {
>          if (at->eof) {
> @@ -475,6 +482,10 @@ static OSStatus ffat_encode_callback(AudioConverterRef converter, UInt32 *nb_pac
>      if (*nb_packets > frame->nb_samples)
>          *nb_packets = frame->nb_samples;
>  
> +    av_frame_unref(at->encoding_frame);
> +    if ((ret = av_frame_ref(at->encoding_frame, frame)) < 0)
> +        return ret;

Wouldn't you have to set nb_packets to 0 in case of failure? And for a
non libav* callback function maybe this shouldn't return an AVERROR(),
but just 1 instead.

Also, look at audiotoolboxdec.c, where the reference (packet there
instead of frame) is created right before calling
AudioConverterFillComplexBuffer(), as it can fail. Maybe you can do
something like that instead.

> +
>      ff_bufqueue_add(avctx, &at->used_frame_queue, frame);
>  
>      return 0;
> @@ -565,6 +576,7 @@ static av_cold int ffat_close_encoder(AVCodecContext *avctx)
>      ff_bufqueue_discard_all(&at->frame_queue);
>      ff_bufqueue_discard_all(&at->used_frame_queue);
>      ff_af_queue_close(&at->afq);
> +    av_frame_free(&at->encoding_frame);
>      return 0;
>  }
>  
>
zhangjiejun1992@gmail.com Jan. 3, 2018, 4:02 a.m. UTC | #2
On Wed, Jan 3, 2018 at 10:02 AM, James Almer <jamrial@gmail.com> wrote:
> On 1/2/2018 1:24 PM, zhangjiejun1992@gmail.com wrote:
>> From: Jiejun Zhang <zhangjiejun1992@gmail.com>
>>
>> This fixes #6940
>>
>> Although undocumented, AudioToolbox seems to require the data supplied
>> by the callback (i.e. ffat_encode_callback) being unchanged until the
>> next time the callback is called. In the old implementation, the
>> AVBuffer backing the frame is recycled after the frame is freed, and
>> somebody else (maybe the decoder) will write into the AVBuffer and
>> change the data. AudioToolbox then encodes some wrong data and noise
>> is produced. Retaining a frame reference solves this problem.
>> ---
>>  libavcodec/audiotoolboxenc.c | 12 ++++++++++++
>>  1 file changed, 12 insertions(+)
>>
>> diff --git a/libavcodec/audiotoolboxenc.c b/libavcodec/audiotoolboxenc.c
>> index 71885d1530..5d3a746348 100644
>> --- a/libavcodec/audiotoolboxenc.c
>> +++ b/libavcodec/audiotoolboxenc.c
>> @@ -48,6 +48,8 @@ typedef struct ATDecodeContext {
>>      AudioFrameQueue afq;
>>      int eof;
>>      int frame_size;
>> +
>> +    AVFrame* encoding_frame;
>>  } ATDecodeContext;
>>
>>  static UInt32 ffat_get_format_id(enum AVCodecID codec, int profile)
>> @@ -442,6 +444,10 @@ static av_cold int ffat_init_encoder(AVCodecContext *avctx)
>>
>>      ff_af_queue_init(avctx, &at->afq);
>>
>> +    at->encoding_frame = av_frame_alloc();
>> +    if (!at->encoding_frame)
>> +        return AVERROR(ENOMEM);
>> +
>>      return 0;
>>  }
>>
>> @@ -453,6 +459,7 @@ static OSStatus ffat_encode_callback(AudioConverterRef converter, UInt32 *nb_pac
>>      AVCodecContext *avctx = inctx;
>>      ATDecodeContext *at = avctx->priv_data;
>>      AVFrame *frame;
>> +    int ret;
>>
>>      if (!at->frame_queue.available) {
>>          if (at->eof) {
>> @@ -475,6 +482,10 @@ static OSStatus ffat_encode_callback(AudioConverterRef converter, UInt32 *nb_pac
>>      if (*nb_packets > frame->nb_samples)
>>          *nb_packets = frame->nb_samples;
>>
>> +    av_frame_unref(at->encoding_frame);
>> +    if ((ret = av_frame_ref(at->encoding_frame, frame)) < 0)
>> +        return ret;
>
> Wouldn't you have to set nb_packets to 0 in case of failure? And for a
> non libav* callback function maybe this shouldn't return an AVERROR(),
> but just 1 instead.

Yeah, will fix it. For the return value, according to Apple's doc: "If
your callback returns an error, it must return zero packets of data.
Upon receiving zero packets, the AudioConverterFillComplexBuffer
function delivers any pending output, stops producing further output,
and returns the error code.", the return value will be finally
returned to ffat_encode. So I think it's fine to return an AVERROR
here, sounds good?

>
> Also, look at audiotoolboxdec.c, where the reference (packet there
> instead of frame) is created right before calling
> AudioConverterFillComplexBuffer(), as it can fail. Maybe you can do
> something like that instead.
>

This might not be possible since a buffer queue is used while
encoding. There's no way to know which frame is currently being
encoded outside the callback. According to a previous commit the
callback is not always called by AudioConverterFillComplexBuffer.
James Almer Jan. 3, 2018, 4:34 a.m. UTC | #3
On 1/3/2018 1:02 AM, Jiejun Zhang wrote:
> On Wed, Jan 3, 2018 at 10:02 AM, James Almer <jamrial@gmail.com> wrote:
>> On 1/2/2018 1:24 PM, zhangjiejun1992@gmail.com wrote:
>>> From: Jiejun Zhang <zhangjiejun1992@gmail.com>
>>>
>>> This fixes #6940
>>>
>>> Although undocumented, AudioToolbox seems to require the data supplied
>>> by the callback (i.e. ffat_encode_callback) being unchanged until the
>>> next time the callback is called. In the old implementation, the
>>> AVBuffer backing the frame is recycled after the frame is freed, and
>>> somebody else (maybe the decoder) will write into the AVBuffer and
>>> change the data. AudioToolbox then encodes some wrong data and noise
>>> is produced. Retaining a frame reference solves this problem.
>>> ---
>>>  libavcodec/audiotoolboxenc.c | 12 ++++++++++++
>>>  1 file changed, 12 insertions(+)
>>>
>>> diff --git a/libavcodec/audiotoolboxenc.c b/libavcodec/audiotoolboxenc.c
>>> index 71885d1530..5d3a746348 100644
>>> --- a/libavcodec/audiotoolboxenc.c
>>> +++ b/libavcodec/audiotoolboxenc.c
>>> @@ -48,6 +48,8 @@ typedef struct ATDecodeContext {
>>>      AudioFrameQueue afq;
>>>      int eof;
>>>      int frame_size;
>>> +
>>> +    AVFrame* encoding_frame;
>>>  } ATDecodeContext;
>>>
>>>  static UInt32 ffat_get_format_id(enum AVCodecID codec, int profile)
>>> @@ -442,6 +444,10 @@ static av_cold int ffat_init_encoder(AVCodecContext *avctx)
>>>
>>>      ff_af_queue_init(avctx, &at->afq);
>>>
>>> +    at->encoding_frame = av_frame_alloc();
>>> +    if (!at->encoding_frame)
>>> +        return AVERROR(ENOMEM);
>>> +
>>>      return 0;
>>>  }
>>>
>>> @@ -453,6 +459,7 @@ static OSStatus ffat_encode_callback(AudioConverterRef converter, UInt32 *nb_pac
>>>      AVCodecContext *avctx = inctx;
>>>      ATDecodeContext *at = avctx->priv_data;
>>>      AVFrame *frame;
>>> +    int ret;
>>>
>>>      if (!at->frame_queue.available) {
>>>          if (at->eof) {
>>> @@ -475,6 +482,10 @@ static OSStatus ffat_encode_callback(AudioConverterRef converter, UInt32 *nb_pac
>>>      if (*nb_packets > frame->nb_samples)
>>>          *nb_packets = frame->nb_samples;
>>>
>>> +    av_frame_unref(at->encoding_frame);
>>> +    if ((ret = av_frame_ref(at->encoding_frame, frame)) < 0)
>>> +        return ret;
>>
>> Wouldn't you have to set nb_packets to 0 in case of failure? And for a
>> non libav* callback function maybe this shouldn't return an AVERROR(),
>> but just 1 instead.
> 
> Yeah, will fix it. For the return value, according to Apple's doc: "If
> your callback returns an error, it must return zero packets of data.
> Upon receiving zero packets, the AudioConverterFillComplexBuffer
> function delivers any pending output, stops producing further output,
> and returns the error code.", the return value will be finally
> returned to ffat_encode. So I think it's fine to return an AVERROR
> here, sounds good?

Sure.

> 
>>
>> Also, look at audiotoolboxdec.c, where the reference (packet there
>> instead of frame) is created right before calling
>> AudioConverterFillComplexBuffer(), as it can fail. Maybe you can do
>> something like that instead.
>>
> 
> This might not be possible since a buffer queue is used while
> encoding. There's no way to know which frame is currently being
> encoded outside the callback. According to a previous commit the
> callback is not always called by AudioConverterFillComplexBuffer.
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
diff mbox

Patch

diff --git a/libavcodec/audiotoolboxenc.c b/libavcodec/audiotoolboxenc.c
index 71885d1530..5d3a746348 100644
--- a/libavcodec/audiotoolboxenc.c
+++ b/libavcodec/audiotoolboxenc.c
@@ -48,6 +48,8 @@  typedef struct ATDecodeContext {
     AudioFrameQueue afq;
     int eof;
     int frame_size;
+
+    AVFrame* encoding_frame;
 } ATDecodeContext;
 
 static UInt32 ffat_get_format_id(enum AVCodecID codec, int profile)
@@ -442,6 +444,10 @@  static av_cold int ffat_init_encoder(AVCodecContext *avctx)
 
     ff_af_queue_init(avctx, &at->afq);
 
+    at->encoding_frame = av_frame_alloc();
+    if (!at->encoding_frame)
+        return AVERROR(ENOMEM);
+
     return 0;
 }
 
@@ -453,6 +459,7 @@  static OSStatus ffat_encode_callback(AudioConverterRef converter, UInt32 *nb_pac
     AVCodecContext *avctx = inctx;
     ATDecodeContext *at = avctx->priv_data;
     AVFrame *frame;
+    int ret;
 
     if (!at->frame_queue.available) {
         if (at->eof) {
@@ -475,6 +482,10 @@  static OSStatus ffat_encode_callback(AudioConverterRef converter, UInt32 *nb_pac
     if (*nb_packets > frame->nb_samples)
         *nb_packets = frame->nb_samples;
 
+    av_frame_unref(at->encoding_frame);
+    if ((ret = av_frame_ref(at->encoding_frame, frame)) < 0)
+        return ret;
+
     ff_bufqueue_add(avctx, &at->used_frame_queue, frame);
 
     return 0;
@@ -565,6 +576,7 @@  static av_cold int ffat_close_encoder(AVCodecContext *avctx)
     ff_bufqueue_discard_all(&at->frame_queue);
     ff_bufqueue_discard_all(&at->used_frame_queue);
     ff_af_queue_close(&at->afq);
+    av_frame_free(&at->encoding_frame);
     return 0;
 }