diff mbox series

[FFmpeg-devel] avcodec/binkaudio: add support for >2 channels dct codec

Message ID 20220318130417.47935-1-onemda@gmail.com
State New
Headers show
Series [FFmpeg-devel] avcodec/binkaudio: add support for >2 channels dct codec | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_armv7_RPi4 success Make finished
andriy/make_fate_armv7_RPi4 success Make fate finished
andriy/make_aarch64_jetson success Make finished
andriy/make_fate_aarch64_jetson success Make fate finished

Commit Message

Paul B Mahol March 18, 2022, 1:04 p.m. UTC
As presented in .binka files.

Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
 libavcodec/binkaudio.c | 50 +++++++++++++++++++++++++++---------------
 1 file changed, 32 insertions(+), 18 deletions(-)

Comments

Andreas Rheinhardt March 18, 2022, 3:03 p.m. UTC | #1
Paul B Mahol:
> As presented in .binka files.
> 
> Signed-off-by: Paul B Mahol <onemda@gmail.com>
> ---
>  libavcodec/binkaudio.c | 50 +++++++++++++++++++++++++++---------------
>  1 file changed, 32 insertions(+), 18 deletions(-)
> 
> diff --git a/libavcodec/binkaudio.c b/libavcodec/binkaudio.c
> index b4ff15beeb..54b7e22854 100644
> --- a/libavcodec/binkaudio.c
> +++ b/libavcodec/binkaudio.c
> @@ -51,13 +51,14 @@ typedef struct BinkAudioContext {
>      int version_b;          ///< Bink version 'b'
>      int first;
>      int channels;
> +    int ch_offset;
>      int frame_len;          ///< transform size (samples)
>      int overlap_len;        ///< overlap size (samples)
>      int block_size;
>      int num_bands;
>      float root;
>      unsigned int bands[26];
> -    float previous[MAX_CHANNELS][BINK_BLOCK_MAX_SIZE / 16];  ///< coeffs from previous audio block
> +    float previous[6][BINK_BLOCK_MAX_SIZE / 16];  ///< coeffs from previous audio block
>      float quant_table[96];
>      AVPacket *pkt;
>      union {
> @@ -74,6 +75,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
>      int sample_rate_half;
>      int i, ret;
>      int frame_len_bits;
> +    int max_channels = avctx->codec->id == AV_CODEC_ID_BINKAUDIO_RDFT ? MAX_CHANNELS : 6;

If you allow up to six channels, then MAX_CHANNELS (i.e. two) needs to
be renamed.

>      int channels = avctx->ch_layout.nb_channels;
>  
>      /* determine frame length */
> @@ -85,7 +87,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
>          frame_len_bits = 11;
>      }
>  
> -    if (channels < 1 || channels > MAX_CHANNELS) {
> +    if (channels < 1 || channels > max_channels) {
>          av_log(avctx, AV_LOG_ERROR, "invalid number of channels: %d\n", channels);
>          return AVERROR_INVALIDDATA;
>      }
> @@ -110,7 +112,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
>  
>      s->frame_len     = 1 << frame_len_bits;
>      s->overlap_len   = s->frame_len / 16;
> -    s->block_size    = (s->frame_len - s->overlap_len) * s->channels;
> +    s->block_size    = (s->frame_len - s->overlap_len) * FFMIN(MAX_CHANNELS, s->channels);
>      sample_rate_half = (sample_rate + 1LL) / 2;
>      if (avctx->codec->id == AV_CODEC_ID_BINKAUDIO_RDFT)
>          s->root = 2.0 / (sqrt(s->frame_len) * 32768.0);
> @@ -166,7 +168,8 @@ static const uint8_t rle_length_tab[16] = {
>   * @param[out] out Output buffer (must contain s->block_size elements)
>   * @return 0 on success, negative error code on failure
>   */
> -static int decode_block(BinkAudioContext *s, float **out, int use_dct)
> +static int decode_block(BinkAudioContext *s, float **out, int use_dct,
> +                        int channels, int ch_offset)
>  {
>      int ch, i, j, k;
>      float q, quant[25];
> @@ -176,8 +179,8 @@ static int decode_block(BinkAudioContext *s, float **out, int use_dct)
>      if (use_dct)
>          skip_bits(gb, 2);
>  
> -    for (ch = 0; ch < s->channels; ch++) {
> -        FFTSample *coeffs = out[ch];
> +    for (ch = 0; ch < channels; ch++) {
> +        FFTSample *coeffs = out[ch + ch_offset];
>  
>          if (s->version_b) {
>              if (get_bits_left(gb) < 64)
> @@ -252,17 +255,17 @@ static int decode_block(BinkAudioContext *s, float **out, int use_dct)
>              s->trans.rdft.rdft_calc(&s->trans.rdft, coeffs);
>      }
>  
> -    for (ch = 0; ch < s->channels; ch++) {
> +    for (ch = 0; ch < channels; ch++) {
>          int j;
> -        int count = s->overlap_len * s->channels;
> +        int count = s->overlap_len * channels;
>          if (!s->first) {
>              j = ch;
> -            for (i = 0; i < s->overlap_len; i++, j += s->channels)
> -                out[ch][i] = (s->previous[ch][i] * (count - j) +
> -                                      out[ch][i] *          j) / count;
> +            for (i = 0; i < s->overlap_len; i++, j += channels)
> +                out[ch + ch_offset][i] = (s->previous[ch + ch_offset][i] * (count - j) +
> +                                      out[ch + ch_offset][i] *          j) / count;
>          }
> -        memcpy(s->previous[ch], &out[ch][s->frame_len - s->overlap_len],
> -               s->overlap_len * sizeof(*s->previous[ch]));
> +        memcpy(s->previous[ch + ch_offset], &out[ch + ch_offset][s->frame_len - s->overlap_len],
> +               s->overlap_len * sizeof(*s->previous[ch + ch_offset]));
>      }
>  
>      s->first = 0;
> @@ -293,6 +296,7 @@ static int binkaudio_receive_frame(AVCodecContext *avctx, AVFrame *frame)
>      GetBitContext *gb = &s->gb;
>      int ret;
>  
> +again:
>      if (!s->pkt->data) {
>          ret = ff_decode_get_packet(avctx, s->pkt);
>          if (ret < 0)
> @@ -313,22 +317,31 @@ static int binkaudio_receive_frame(AVCodecContext *avctx, AVFrame *frame)
>      }
>  
>      /* get output buffer */
> -    frame->nb_samples = s->frame_len;
> -    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
> -        return ret;
> +    if (s->ch_offset == 0) {
> +        frame->nb_samples = s->frame_len;
> +        if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
> +            return ret;
> +    }
>  
>      if (decode_block(s, (float **)frame->extended_data,
> -                     avctx->codec->id == AV_CODEC_ID_BINKAUDIO_DCT)) {
> +                     avctx->codec->id == AV_CODEC_ID_BINKAUDIO_DCT,
> +                     FFMIN(MAX_CHANNELS, s->channels), s->ch_offset)) {
>          av_log(avctx, AV_LOG_ERROR, "Incomplete packet\n");
>          return AVERROR_INVALIDDATA;
>      }
> +    s->ch_offset += MAX_CHANNELS;
>      get_bits_align32(gb);
>      if (!get_bits_left(gb)) {
>          memset(gb, 0, sizeof(*gb));
>          av_packet_unref(s->pkt);
>      }
> +    if (s->ch_offset >= s->channels) {
> +        s->ch_offset = 0;
> +    } else {
> +        goto again;
> +    }

Is it really intended that the data for one multi-channel frame is
divided into several input packets?

>  
> -    frame->nb_samples = s->block_size / avctx->ch_layout.nb_channels;
> +    frame->nb_samples = s->block_size / FFMIN(avctx->ch_layout.nb_channels, MAX_CHANNELS);
>  
>      return 0;
>  fail:
> @@ -343,6 +356,7 @@ static void decode_flush(AVCodecContext *avctx)
>      /* s->pkt coincides with avctx->internal->in_pkt
>       * and is unreferenced generically when flushing. */
>      s->first = 1;
> +    s->ch_offset = 0;
>  }
>  
>  const AVCodec ff_binkaudio_rdft_decoder = {
Paul B Mahol March 18, 2022, 3:21 p.m. UTC | #2
On 3/18/22, Andreas Rheinhardt <andreas.rheinhardt@outlook.com> wrote:
> Paul B Mahol:
>> As presented in .binka files.
>>
>> Signed-off-by: Paul B Mahol <onemda@gmail.com>
>> ---
>>  libavcodec/binkaudio.c | 50 +++++++++++++++++++++++++++---------------
>>  1 file changed, 32 insertions(+), 18 deletions(-)
>>
>> diff --git a/libavcodec/binkaudio.c b/libavcodec/binkaudio.c
>> index b4ff15beeb..54b7e22854 100644
>> --- a/libavcodec/binkaudio.c
>> +++ b/libavcodec/binkaudio.c
>> @@ -51,13 +51,14 @@ typedef struct BinkAudioContext {
>>      int version_b;          ///< Bink version 'b'
>>      int first;
>>      int channels;
>> +    int ch_offset;
>>      int frame_len;          ///< transform size (samples)
>>      int overlap_len;        ///< overlap size (samples)
>>      int block_size;
>>      int num_bands;
>>      float root;
>>      unsigned int bands[26];
>> -    float previous[MAX_CHANNELS][BINK_BLOCK_MAX_SIZE / 16];  ///< coeffs
>> from previous audio block
>> +    float previous[6][BINK_BLOCK_MAX_SIZE / 16];  ///< coeffs from
>> previous audio block
>>      float quant_table[96];
>>      AVPacket *pkt;
>>      union {
>> @@ -74,6 +75,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
>>      int sample_rate_half;
>>      int i, ret;
>>      int frame_len_bits;
>> +    int max_channels = avctx->codec->id == AV_CODEC_ID_BINKAUDIO_RDFT ?
>> MAX_CHANNELS : 6;
>
> If you allow up to six channels, then MAX_CHANNELS (i.e. two) needs to
> be renamed.
>
>>      int channels = avctx->ch_layout.nb_channels;
>>
>>      /* determine frame length */
>> @@ -85,7 +87,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
>>          frame_len_bits = 11;
>>      }
>>
>> -    if (channels < 1 || channels > MAX_CHANNELS) {
>> +    if (channels < 1 || channels > max_channels) {
>>          av_log(avctx, AV_LOG_ERROR, "invalid number of channels: %d\n",
>> channels);
>>          return AVERROR_INVALIDDATA;
>>      }
>> @@ -110,7 +112,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
>>
>>      s->frame_len     = 1 << frame_len_bits;
>>      s->overlap_len   = s->frame_len / 16;
>> -    s->block_size    = (s->frame_len - s->overlap_len) * s->channels;
>> +    s->block_size    = (s->frame_len - s->overlap_len) *
>> FFMIN(MAX_CHANNELS, s->channels);
>>      sample_rate_half = (sample_rate + 1LL) / 2;
>>      if (avctx->codec->id == AV_CODEC_ID_BINKAUDIO_RDFT)
>>          s->root = 2.0 / (sqrt(s->frame_len) * 32768.0);
>> @@ -166,7 +168,8 @@ static const uint8_t rle_length_tab[16] = {
>>   * @param[out] out Output buffer (must contain s->block_size elements)
>>   * @return 0 on success, negative error code on failure
>>   */
>> -static int decode_block(BinkAudioContext *s, float **out, int use_dct)
>> +static int decode_block(BinkAudioContext *s, float **out, int use_dct,
>> +                        int channels, int ch_offset)
>>  {
>>      int ch, i, j, k;
>>      float q, quant[25];
>> @@ -176,8 +179,8 @@ static int decode_block(BinkAudioContext *s, float
>> **out, int use_dct)
>>      if (use_dct)
>>          skip_bits(gb, 2);
>>
>> -    for (ch = 0; ch < s->channels; ch++) {
>> -        FFTSample *coeffs = out[ch];
>> +    for (ch = 0; ch < channels; ch++) {
>> +        FFTSample *coeffs = out[ch + ch_offset];
>>
>>          if (s->version_b) {
>>              if (get_bits_left(gb) < 64)
>> @@ -252,17 +255,17 @@ static int decode_block(BinkAudioContext *s, float
>> **out, int use_dct)
>>              s->trans.rdft.rdft_calc(&s->trans.rdft, coeffs);
>>      }
>>
>> -    for (ch = 0; ch < s->channels; ch++) {
>> +    for (ch = 0; ch < channels; ch++) {
>>          int j;
>> -        int count = s->overlap_len * s->channels;
>> +        int count = s->overlap_len * channels;
>>          if (!s->first) {
>>              j = ch;
>> -            for (i = 0; i < s->overlap_len; i++, j += s->channels)
>> -                out[ch][i] = (s->previous[ch][i] * (count - j) +
>> -                                      out[ch][i] *          j) / count;
>> +            for (i = 0; i < s->overlap_len; i++, j += channels)
>> +                out[ch + ch_offset][i] = (s->previous[ch + ch_offset][i]
>> * (count - j) +
>> +                                      out[ch + ch_offset][i] *
>> j) / count;
>>          }
>> -        memcpy(s->previous[ch], &out[ch][s->frame_len - s->overlap_len],
>> -               s->overlap_len * sizeof(*s->previous[ch]));
>> +        memcpy(s->previous[ch + ch_offset], &out[ch +
>> ch_offset][s->frame_len - s->overlap_len],
>> +               s->overlap_len * sizeof(*s->previous[ch + ch_offset]));
>>      }
>>
>>      s->first = 0;
>> @@ -293,6 +296,7 @@ static int binkaudio_receive_frame(AVCodecContext
>> *avctx, AVFrame *frame)
>>      GetBitContext *gb = &s->gb;
>>      int ret;
>>
>> +again:
>>      if (!s->pkt->data) {
>>          ret = ff_decode_get_packet(avctx, s->pkt);
>>          if (ret < 0)
>> @@ -313,22 +317,31 @@ static int binkaudio_receive_frame(AVCodecContext
>> *avctx, AVFrame *frame)
>>      }
>>
>>      /* get output buffer */
>> -    frame->nb_samples = s->frame_len;
>> -    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
>> -        return ret;
>> +    if (s->ch_offset == 0) {
>> +        frame->nb_samples = s->frame_len;
>> +        if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
>> +            return ret;
>> +    }
>>
>>      if (decode_block(s, (float **)frame->extended_data,
>> -                     avctx->codec->id == AV_CODEC_ID_BINKAUDIO_DCT)) {
>> +                     avctx->codec->id == AV_CODEC_ID_BINKAUDIO_DCT,
>> +                     FFMIN(MAX_CHANNELS, s->channels), s->ch_offset)) {
>>          av_log(avctx, AV_LOG_ERROR, "Incomplete packet\n");
>>          return AVERROR_INVALIDDATA;
>>      }
>> +    s->ch_offset += MAX_CHANNELS;
>>      get_bits_align32(gb);
>>      if (!get_bits_left(gb)) {
>>          memset(gb, 0, sizeof(*gb));
>>          av_packet_unref(s->pkt);
>>      }
>> +    if (s->ch_offset >= s->channels) {
>> +        s->ch_offset = 0;
>> +    } else {
>> +        goto again;
>> +    }
>
> Is it really intended that the data for one multi-channel frame is
> divided into several input packets?

You are missing big picture here, >2 files have channels in different
packets interleaved.
Something like in XMA. (And nothing signals how are they interleaved.
so its worse than in XMA) So it is working fine. I just need another
look for possible regressions and security implications. Renaming
MAX_CHANNELS is not useful as that is not property of both codecs.

>
>>
>> -    frame->nb_samples = s->block_size / avctx->ch_layout.nb_channels;
>> +    frame->nb_samples = s->block_size /
>> FFMIN(avctx->ch_layout.nb_channels, MAX_CHANNELS);
>>
>>      return 0;
>>  fail:
>> @@ -343,6 +356,7 @@ static void decode_flush(AVCodecContext *avctx)
>>      /* s->pkt coincides with avctx->internal->in_pkt
>>       * and is unreferenced generically when flushing. */
>>      s->first = 1;
>> +    s->ch_offset = 0;
>>  }
>>
>>  const AVCodec ff_binkaudio_rdft_decoder = {
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Peter Ross March 20, 2022, 4:37 a.m. UTC | #3
On Fri, Mar 18, 2022 at 04:21:44PM +0100, Paul B Mahol wrote:
> On 3/18/22, Andreas Rheinhardt <andreas.rheinhardt@outlook.com> wrote:
> > Paul B Mahol:
> >> As presented in .binka files.
> >>
> >> Signed-off-by: Paul B Mahol <onemda@gmail.com>
> >> ---
> >>  libavcodec/binkaudio.c | 50 +++++++++++++++++++++++++++---------------
> >>  1 file changed, 32 insertions(+), 18 deletions(-)
> >>
> >> diff --git a/libavcodec/binkaudio.c b/libavcodec/binkaudio.c
> >> index b4ff15beeb..54b7e22854 100644
> >> --- a/libavcodec/binkaudio.c
> >> +++ b/libavcodec/binkaudio.c
> >> @@ -51,13 +51,14 @@ typedef struct BinkAudioContext {
> >>      int version_b;          ///< Bink version 'b'
> >>      int first;
> >>      int channels;
> >> +    int ch_offset;
> >>      int frame_len;          ///< transform size (samples)
> >>      int overlap_len;        ///< overlap size (samples)
> >>      int block_size;
> >>      int num_bands;
> >>      float root;
> >>      unsigned int bands[26];
> >> -    float previous[MAX_CHANNELS][BINK_BLOCK_MAX_SIZE / 16];  ///< coeffs
> >> from previous audio block
> >> +    float previous[6][BINK_BLOCK_MAX_SIZE / 16];  ///< coeffs from
> >> previous audio block
> >>      float quant_table[96];
> >>      AVPacket *pkt;
> >>      union {
> >> @@ -74,6 +75,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
> >>      int sample_rate_half;
> >>      int i, ret;
> >>      int frame_len_bits;
> >> +    int max_channels = avctx->codec->id == AV_CODEC_ID_BINKAUDIO_RDFT ?
> >> MAX_CHANNELS : 6;
> >
> > If you allow up to six channels, then MAX_CHANNELS (i.e. two) needs to
> > be renamed.
> >
> >>      int channels = avctx->ch_layout.nb_channels;
> >>
> >>      /* determine frame length */
> >> @@ -85,7 +87,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
> >>          frame_len_bits = 11;
> >>      }
> >>
> >> -    if (channels < 1 || channels > MAX_CHANNELS) {
> >> +    if (channels < 1 || channels > max_channels) {
> >>          av_log(avctx, AV_LOG_ERROR, "invalid number of channels: %d\n",
> >> channels);
> >>          return AVERROR_INVALIDDATA;
> >>      }
> >> @@ -110,7 +112,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
> >>
> >>      s->frame_len     = 1 << frame_len_bits;
> >>      s->overlap_len   = s->frame_len / 16;
> >> -    s->block_size    = (s->frame_len - s->overlap_len) * s->channels;
> >> +    s->block_size    = (s->frame_len - s->overlap_len) *
> >> FFMIN(MAX_CHANNELS, s->channels);
> >>      sample_rate_half = (sample_rate + 1LL) / 2;
> >>      if (avctx->codec->id == AV_CODEC_ID_BINKAUDIO_RDFT)
> >>          s->root = 2.0 / (sqrt(s->frame_len) * 32768.0);
> >> @@ -166,7 +168,8 @@ static const uint8_t rle_length_tab[16] = {
> >>   * @param[out] out Output buffer (must contain s->block_size elements)
> >>   * @return 0 on success, negative error code on failure
> >>   */
> >> -static int decode_block(BinkAudioContext *s, float **out, int use_dct)
> >> +static int decode_block(BinkAudioContext *s, float **out, int use_dct,
> >> +                        int channels, int ch_offset)
> >>  {
> >>      int ch, i, j, k;
> >>      float q, quant[25];
> >> @@ -176,8 +179,8 @@ static int decode_block(BinkAudioContext *s, float
> >> **out, int use_dct)
> >>      if (use_dct)
> >>          skip_bits(gb, 2);
> >>
> >> -    for (ch = 0; ch < s->channels; ch++) {
> >> -        FFTSample *coeffs = out[ch];
> >> +    for (ch = 0; ch < channels; ch++) {
> >> +        FFTSample *coeffs = out[ch + ch_offset];
> >>
> >>          if (s->version_b) {
> >>              if (get_bits_left(gb) < 64)
> >> @@ -252,17 +255,17 @@ static int decode_block(BinkAudioContext *s, float
> >> **out, int use_dct)
> >>              s->trans.rdft.rdft_calc(&s->trans.rdft, coeffs);
> >>      }
> >>
> >> -    for (ch = 0; ch < s->channels; ch++) {
> >> +    for (ch = 0; ch < channels; ch++) {
> >>          int j;
> >> -        int count = s->overlap_len * s->channels;
> >> +        int count = s->overlap_len * channels;
> >>          if (!s->first) {
> >>              j = ch;
> >> -            for (i = 0; i < s->overlap_len; i++, j += s->channels)
> >> -                out[ch][i] = (s->previous[ch][i] * (count - j) +
> >> -                                      out[ch][i] *          j) / count;
> >> +            for (i = 0; i < s->overlap_len; i++, j += channels)
> >> +                out[ch + ch_offset][i] = (s->previous[ch + ch_offset][i]
> >> * (count - j) +
> >> +                                      out[ch + ch_offset][i] *
> >> j) / count;

^^^ This line needs to be indented some more, to match the previous line.

> >>          }
> >> -        memcpy(s->previous[ch], &out[ch][s->frame_len - s->overlap_len],
> >> -               s->overlap_len * sizeof(*s->previous[ch]));
> >> +        memcpy(s->previous[ch + ch_offset], &out[ch +
> >> ch_offset][s->frame_len - s->overlap_len],
> >> +               s->overlap_len * sizeof(*s->previous[ch + ch_offset]));
> >>      }
> >>
> >>      s->first = 0;
> >> @@ -293,6 +296,7 @@ static int binkaudio_receive_frame(AVCodecContext
> >> *avctx, AVFrame *frame)
> >>      GetBitContext *gb = &s->gb;
> >>      int ret;
> >>
> >> +again:
> >>      if (!s->pkt->data) {
> >>          ret = ff_decode_get_packet(avctx, s->pkt);
> >>          if (ret < 0)
> >> @@ -313,22 +317,31 @@ static int binkaudio_receive_frame(AVCodecContext
> >> *avctx, AVFrame *frame)
> >>      }
> >>
> >>      /* get output buffer */
> >> -    frame->nb_samples = s->frame_len;
> >> -    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
> >> -        return ret;
> >> +    if (s->ch_offset == 0) {
> >> +        frame->nb_samples = s->frame_len;
> >> +        if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
> >> +            return ret;
> >> +    }
> >>
> >>      if (decode_block(s, (float **)frame->extended_data,
> >> -                     avctx->codec->id == AV_CODEC_ID_BINKAUDIO_DCT)) {
> >> +                     avctx->codec->id == AV_CODEC_ID_BINKAUDIO_DCT,
> >> +                     FFMIN(MAX_CHANNELS, s->channels), s->ch_offset)) {
> >>          av_log(avctx, AV_LOG_ERROR, "Incomplete packet\n");
> >>          return AVERROR_INVALIDDATA;
> >>      }
> >> +    s->ch_offset += MAX_CHANNELS;
> >>      get_bits_align32(gb);
> >>      if (!get_bits_left(gb)) {
> >>          memset(gb, 0, sizeof(*gb));
> >>          av_packet_unref(s->pkt);
> >>      }
> >> +    if (s->ch_offset >= s->channels) {
> >> +        s->ch_offset = 0;
> >> +    } else {
> >> +        goto again;
> >> +    }
> >
> > Is it really intended that the data for one multi-channel frame is
> > divided into several input packets?
> 
> You are missing big picture here, >2 files have channels in different
> packets interleaved.
> Something like in XMA. (And nothing signals how are they interleaved.
> so its worse than in XMA) So it is working fine. I just need another
> look for possible regressions and security implications. Renaming
> MAX_CHANNELS is not useful as that is not property of both codecs.

MAX_CHANNELS (2) *is* a property of both codecs, and should be left alone.

I would prefer the '6' magic number be put into a descriptive macro.

LGTM.

-- Peter
(A907 E02F A6E5 0CD2 34CD 20D2 6760 79C5 AC40 DD6B)
diff mbox series

Patch

diff --git a/libavcodec/binkaudio.c b/libavcodec/binkaudio.c
index b4ff15beeb..54b7e22854 100644
--- a/libavcodec/binkaudio.c
+++ b/libavcodec/binkaudio.c
@@ -51,13 +51,14 @@  typedef struct BinkAudioContext {
     int version_b;          ///< Bink version 'b'
     int first;
     int channels;
+    int ch_offset;
     int frame_len;          ///< transform size (samples)
     int overlap_len;        ///< overlap size (samples)
     int block_size;
     int num_bands;
     float root;
     unsigned int bands[26];
-    float previous[MAX_CHANNELS][BINK_BLOCK_MAX_SIZE / 16];  ///< coeffs from previous audio block
+    float previous[6][BINK_BLOCK_MAX_SIZE / 16];  ///< coeffs from previous audio block
     float quant_table[96];
     AVPacket *pkt;
     union {
@@ -74,6 +75,7 @@  static av_cold int decode_init(AVCodecContext *avctx)
     int sample_rate_half;
     int i, ret;
     int frame_len_bits;
+    int max_channels = avctx->codec->id == AV_CODEC_ID_BINKAUDIO_RDFT ? MAX_CHANNELS : 6;
     int channels = avctx->ch_layout.nb_channels;
 
     /* determine frame length */
@@ -85,7 +87,7 @@  static av_cold int decode_init(AVCodecContext *avctx)
         frame_len_bits = 11;
     }
 
-    if (channels < 1 || channels > MAX_CHANNELS) {
+    if (channels < 1 || channels > max_channels) {
         av_log(avctx, AV_LOG_ERROR, "invalid number of channels: %d\n", channels);
         return AVERROR_INVALIDDATA;
     }
@@ -110,7 +112,7 @@  static av_cold int decode_init(AVCodecContext *avctx)
 
     s->frame_len     = 1 << frame_len_bits;
     s->overlap_len   = s->frame_len / 16;
-    s->block_size    = (s->frame_len - s->overlap_len) * s->channels;
+    s->block_size    = (s->frame_len - s->overlap_len) * FFMIN(MAX_CHANNELS, s->channels);
     sample_rate_half = (sample_rate + 1LL) / 2;
     if (avctx->codec->id == AV_CODEC_ID_BINKAUDIO_RDFT)
         s->root = 2.0 / (sqrt(s->frame_len) * 32768.0);
@@ -166,7 +168,8 @@  static const uint8_t rle_length_tab[16] = {
  * @param[out] out Output buffer (must contain s->block_size elements)
  * @return 0 on success, negative error code on failure
  */
-static int decode_block(BinkAudioContext *s, float **out, int use_dct)
+static int decode_block(BinkAudioContext *s, float **out, int use_dct,
+                        int channels, int ch_offset)
 {
     int ch, i, j, k;
     float q, quant[25];
@@ -176,8 +179,8 @@  static int decode_block(BinkAudioContext *s, float **out, int use_dct)
     if (use_dct)
         skip_bits(gb, 2);
 
-    for (ch = 0; ch < s->channels; ch++) {
-        FFTSample *coeffs = out[ch];
+    for (ch = 0; ch < channels; ch++) {
+        FFTSample *coeffs = out[ch + ch_offset];
 
         if (s->version_b) {
             if (get_bits_left(gb) < 64)
@@ -252,17 +255,17 @@  static int decode_block(BinkAudioContext *s, float **out, int use_dct)
             s->trans.rdft.rdft_calc(&s->trans.rdft, coeffs);
     }
 
-    for (ch = 0; ch < s->channels; ch++) {
+    for (ch = 0; ch < channels; ch++) {
         int j;
-        int count = s->overlap_len * s->channels;
+        int count = s->overlap_len * channels;
         if (!s->first) {
             j = ch;
-            for (i = 0; i < s->overlap_len; i++, j += s->channels)
-                out[ch][i] = (s->previous[ch][i] * (count - j) +
-                                      out[ch][i] *          j) / count;
+            for (i = 0; i < s->overlap_len; i++, j += channels)
+                out[ch + ch_offset][i] = (s->previous[ch + ch_offset][i] * (count - j) +
+                                      out[ch + ch_offset][i] *          j) / count;
         }
-        memcpy(s->previous[ch], &out[ch][s->frame_len - s->overlap_len],
-               s->overlap_len * sizeof(*s->previous[ch]));
+        memcpy(s->previous[ch + ch_offset], &out[ch + ch_offset][s->frame_len - s->overlap_len],
+               s->overlap_len * sizeof(*s->previous[ch + ch_offset]));
     }
 
     s->first = 0;
@@ -293,6 +296,7 @@  static int binkaudio_receive_frame(AVCodecContext *avctx, AVFrame *frame)
     GetBitContext *gb = &s->gb;
     int ret;
 
+again:
     if (!s->pkt->data) {
         ret = ff_decode_get_packet(avctx, s->pkt);
         if (ret < 0)
@@ -313,22 +317,31 @@  static int binkaudio_receive_frame(AVCodecContext *avctx, AVFrame *frame)
     }
 
     /* get output buffer */
-    frame->nb_samples = s->frame_len;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
-        return ret;
+    if (s->ch_offset == 0) {
+        frame->nb_samples = s->frame_len;
+        if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+            return ret;
+    }
 
     if (decode_block(s, (float **)frame->extended_data,
-                     avctx->codec->id == AV_CODEC_ID_BINKAUDIO_DCT)) {
+                     avctx->codec->id == AV_CODEC_ID_BINKAUDIO_DCT,
+                     FFMIN(MAX_CHANNELS, s->channels), s->ch_offset)) {
         av_log(avctx, AV_LOG_ERROR, "Incomplete packet\n");
         return AVERROR_INVALIDDATA;
     }
+    s->ch_offset += MAX_CHANNELS;
     get_bits_align32(gb);
     if (!get_bits_left(gb)) {
         memset(gb, 0, sizeof(*gb));
         av_packet_unref(s->pkt);
     }
+    if (s->ch_offset >= s->channels) {
+        s->ch_offset = 0;
+    } else {
+        goto again;
+    }
 
-    frame->nb_samples = s->block_size / avctx->ch_layout.nb_channels;
+    frame->nb_samples = s->block_size / FFMIN(avctx->ch_layout.nb_channels, MAX_CHANNELS);
 
     return 0;
 fail:
@@ -343,6 +356,7 @@  static void decode_flush(AVCodecContext *avctx)
     /* s->pkt coincides with avctx->internal->in_pkt
      * and is unreferenced generically when flushing. */
     s->first = 1;
+    s->ch_offset = 0;
 }
 
 const AVCodec ff_binkaudio_rdft_decoder = {