diff mbox series

[FFmpeg-devel,v2,8/8] aacdec: add a decoder for AAC USAC (xHE-AAC)

Message ID 20240519165444.829271-9-dev@lynne.ee
State New
Headers show
Series aacdec: add a native xHE-AAC decoder | expand

Checks

Context Check Description
yinshiyou/make_fate_loongarch64 fail Make fate failed
yinshiyou/make_loongarch64 warning New warnings during build
andriy/make_fate_x86 fail Make fate failed
andriy/make_x86 warning New warnings during build

Commit Message

Lynne May 19, 2024, 4:54 p.m. UTC
This commit adds a decoder for the frequency-domain part of USAC.

What works:
 - Mono
 - Stereo (no prediction)
 - Stereo (mid/side coding)
 - Stereo (complex prediction)

What's left:
 - Speech coding

Known issues:
 - Desync with certain sequences
 - Preroll crossover missing (shouldn't matter, bitrate adaptation only)
---
 libavcodec/aac/Makefile              |    3 +-
 libavcodec/aac/aacdec.c              |  188 +--
 libavcodec/aac/aacdec.h              |  187 +++
 libavcodec/aac/aacdec_ac.c           |  208 ++++
 libavcodec/aac/aacdec_ac.h           |   54 +
 libavcodec/aac/aacdec_dsp_template.c |    4 +-
 libavcodec/aac/aacdec_latm.h         |   14 +-
 libavcodec/aac/aacdec_lpd.c          |  198 ++++
 libavcodec/aac/aacdec_lpd.h          |   33 +
 libavcodec/aac/aacdec_usac.c         | 1587 ++++++++++++++++++++++++++
 libavcodec/aac/aacdec_usac.h         |   39 +
 libavcodec/aactab.c                  |   42 +
 libavcodec/aactab.h                  |   10 +
 13 files changed, 2491 insertions(+), 76 deletions(-)
 create mode 100644 libavcodec/aac/aacdec_ac.c
 create mode 100644 libavcodec/aac/aacdec_ac.h
 create mode 100644 libavcodec/aac/aacdec_lpd.c
 create mode 100644 libavcodec/aac/aacdec_lpd.h
 create mode 100644 libavcodec/aac/aacdec_usac.c
 create mode 100644 libavcodec/aac/aacdec_usac.h

Comments

Marton Balint May 19, 2024, 7:39 p.m. UTC | #1
On Sun, 19 May 2024, Lynne via ffmpeg-devel wrote:

> This commit adds a decoder for the frequency-domain part of USAC.
>
[...]

>
> +/* Finish later */
> +static const enum AVChannel usac_ch_pos_to_av[64] = {
> +    [0] = AV_CHAN_FRONT_LEFT,
> +    [1] = AV_CHAN_FRONT_RIGHT,
> +    [2] = AV_CHAN_FRONT_CENTER,
> +    [3] = AV_CHAN_LOW_FREQUENCY,
> +    [4] = AV_CHAN_BACK_LEFT, // unsure
> +    [5] = AV_CHAN_BACK_RIGHT, // unsure
> +    [6] = AV_CHAN_FRONT_LEFT_OF_CENTER,
> +    [7] = AV_CHAN_FRONT_RIGHT_OF_CENTER,
> +    [8] = 0, /* rear surround left is missing */
> +    [9] = 0, /* rear surround right is missing */
> +    [10] = AV_CHAN_BACK_CENTER,
> +    [11] = AV_CHAN_SURROUND_DIRECT_LEFT,
> +    [12] = AV_CHAN_SURROUND_DIRECT_RIGHT,
> +    [13] = AV_CHAN_SIDE_LEFT, // fairly sure
> +    [14] = AV_CHAN_SIDE_RIGHT, // fairly sure
> +    [15] = AV_CHAN_WIDE_LEFT, // somewhat confident
> +    [16] = AV_CHAN_WIDE_RIGHT, // somewhat confident
> +    [17] = AV_CHAN_TOP_FRONT_LEFT,
> +    [18] = AV_CHAN_TOP_FRONT_RIGHT,
> +    [19] = AV_CHAN_TOP_FRONT_CENTER,
> +    [20] = AV_CHAN_TOP_BACK_LEFT,
> +    [21] = AV_CHAN_TOP_BACK_RIGHT,
> +    [22] = AV_CHAN_TOP_BACK_CENTER,
> +    [23] = AV_CHAN_TOP_SIDE_LEFT,
> +    [24] = AV_CHAN_TOP_SIDE_RIGHT,
> +    [25] = AV_CHAN_TOP_CENTER,
> +    [26] = AV_CHAN_LOW_FREQUENCY, // actually LFE2
> +    [27] = AV_CHAN_BOTTOM_FRONT_LEFT,
> +    [28] = AV_CHAN_BOTTOM_FRONT_RIGHT,
> +    [29] = AV_CHAN_BOTTOM_FRONT_CENTER,
> +    [30] = 0, /* top left surround is missing */
> +    [31] = 0, /* top right surround is missing */
> +};

Some comment would be nice about the source of this table (which 
document, which table).

It looks very similar to the ISO channel positons used in mov_chan. I 
think we follow this mapping in most cases:

Left  Surround is SIDE_LEFT
Right Surround is SIDE_RIGHT
Rear Surround Left  is BACK_LEFT
Rear Surround Right is BACK_RIGHT

So in your table [4] and [5] should be SIDE, [8] and [9] should be 
BACK. [26] can be AV_CHAN_LOW_FREQUENCY_2, we do have that.

Yes, Left/Right Surround and Left/Right Side Surround will be the same, 
but those are not present in commonly used layouts at the same time.

Regards,
Marton
Lynne May 19, 2024, 7:50 p.m. UTC | #2
On 19/05/2024 21:39, Marton Balint wrote:
> 
> 
> On Sun, 19 May 2024, Lynne via ffmpeg-devel wrote:
> 
>> This commit adds a decoder for the frequency-domain part of USAC.
>>
> [...]
> 
>>
>> +/* Finish later */
>> +static const enum AVChannel usac_ch_pos_to_av[64] = {
>> +    [0] = AV_CHAN_FRONT_LEFT,
>> +    [1] = AV_CHAN_FRONT_RIGHT,
>> +    [2] = AV_CHAN_FRONT_CENTER,
>> +    [3] = AV_CHAN_LOW_FREQUENCY,
>> +    [4] = AV_CHAN_BACK_LEFT, // unsure
>> +    [5] = AV_CHAN_BACK_RIGHT, // unsure
>> +    [6] = AV_CHAN_FRONT_LEFT_OF_CENTER,
>> +    [7] = AV_CHAN_FRONT_RIGHT_OF_CENTER,
>> +    [8] = 0, /* rear surround left is missing */
>> +    [9] = 0, /* rear surround right is missing */
>> +    [10] = AV_CHAN_BACK_CENTER,
>> +    [11] = AV_CHAN_SURROUND_DIRECT_LEFT,
>> +    [12] = AV_CHAN_SURROUND_DIRECT_RIGHT,
>> +    [13] = AV_CHAN_SIDE_LEFT, // fairly sure
>> +    [14] = AV_CHAN_SIDE_RIGHT, // fairly sure
>> +    [15] = AV_CHAN_WIDE_LEFT, // somewhat confident
>> +    [16] = AV_CHAN_WIDE_RIGHT, // somewhat confident
>> +    [17] = AV_CHAN_TOP_FRONT_LEFT,
>> +    [18] = AV_CHAN_TOP_FRONT_RIGHT,
>> +    [19] = AV_CHAN_TOP_FRONT_CENTER,
>> +    [20] = AV_CHAN_TOP_BACK_LEFT,
>> +    [21] = AV_CHAN_TOP_BACK_RIGHT,
>> +    [22] = AV_CHAN_TOP_BACK_CENTER,
>> +    [23] = AV_CHAN_TOP_SIDE_LEFT,
>> +    [24] = AV_CHAN_TOP_SIDE_RIGHT,
>> +    [25] = AV_CHAN_TOP_CENTER,
>> +    [26] = AV_CHAN_LOW_FREQUENCY, // actually LFE2
>> +    [27] = AV_CHAN_BOTTOM_FRONT_LEFT,
>> +    [28] = AV_CHAN_BOTTOM_FRONT_RIGHT,
>> +    [29] = AV_CHAN_BOTTOM_FRONT_CENTER,
>> +    [30] = 0, /* top left surround is missing */
>> +    [31] = 0, /* top right surround is missing */
>> +};
> 
> Some comment would be nice about the source of this table (which 
> document, which table).
> 
> It looks very similar to the ISO channel positons used in mov_chan. I 
> think we follow this mapping in most cases:
> 
> Left  Surround is SIDE_LEFT
> Right Surround is SIDE_RIGHT
> Rear Surround Left  is BACK_LEFT
> Rear Surround Right is BACK_RIGHT
> 
> So in your table [4] and [5] should be SIDE, [8] and [9] should be BACK. 
> [26] can be AV_CHAN_LOW_FREQUENCY_2, we do have that.
> 
> Yes, Left/Right Surround and Left/Right Side Surround will be the same, 
> but those are not present in commonly used layouts at the same time.
> 
> Regards,
> Marton
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

Source of the table is ISO/IEC 23003-3, Table 74 — bsOutputChannelPos:

0 L left front FL front left
1 R right front FR front right
2 C center front FC front centre
3 LFE low frequency enhancement LFE1 low frequency effects-1
4 Ls left surround LS left surround
5 Rs right surround RS right surround
6 Lc left front center FLc front left centre
7 Rc right front center FRc front right centre
8 Lsr rear surround left BL back left
9 Rsr rear surround right BR back right
10 Cs rear center BC back centre
11 Lsd left surround direct LSd left surround direct
12 Rsd right surround direct RSd right surround direct
13 Lss left side surround SL side left
14 Rss right side surround SR side right
15 Lw left wide front FLw front left wide
16 Rw right wide front FRw front right wide
17 Lv left front vertical height TpFL top front left
18 Rv right front vertical height TpFR top front right
19 Cv center front vertical height TpFC top front centre
20 Lvr left surround vertical height rear TpBL top back left
21 Rvr right surround vertical height rear TpBR top back right
22 Cvr center vertical height rear TpBC top back centre
23 Lvss left vertical height side surround TpSiL top side left
24 Rvss right vertical height side surround TpSiR top side right
25 Ts top center surround TpC top centre
26 LFE2 low frequency enhancement 2 LFE2 low frequency effects-2
27 Lb left front vertical bottom BtFL bottom front left
28 Rb right front vertical bottom BtFR bottom front right
29 Cb center front vertical bottom BtFC bottom front centre
30 Lvs left vertical height surround TpLS top left surround
31 Rvs right vertical height surround TpRS top right surround

Third field is "Loudspeaker position", last field is "Loudspeaker
position according to IEC 100/1706/CDV/IEC 62574 (TC100)", each prefixed 
with an abbreviation.

I've added the source to the table comment in the code.

I've also fixed the SIDE/BACK/LFE2 issue in my github repo I linked earlier.

Thanks.
Michael Niedermayer May 19, 2024, 11:19 p.m. UTC | #3
On Sun, May 19, 2024 at 06:54:44PM +0200, Lynne via ffmpeg-devel wrote:
> This commit adds a decoder for the frequency-domain part of USAC.
> 
> What works:
>  - Mono
>  - Stereo (no prediction)
>  - Stereo (mid/side coding)
>  - Stereo (complex prediction)
> 
> What's left:
>  - Speech coding
> 
> Known issues:
>  - Desync with certain sequences
>  - Preroll crossover missing (shouldn't matter, bitrate adaptation only)
> ---
>  libavcodec/aac/Makefile              |    3 +-
>  libavcodec/aac/aacdec.c              |  188 +--
>  libavcodec/aac/aacdec.h              |  187 +++
>  libavcodec/aac/aacdec_ac.c           |  208 ++++
>  libavcodec/aac/aacdec_ac.h           |   54 +
>  libavcodec/aac/aacdec_dsp_template.c |    4 +-
>  libavcodec/aac/aacdec_latm.h         |   14 +-
>  libavcodec/aac/aacdec_lpd.c          |  198 ++++
>  libavcodec/aac/aacdec_lpd.h          |   33 +
>  libavcodec/aac/aacdec_usac.c         | 1587 ++++++++++++++++++++++++++
>  libavcodec/aac/aacdec_usac.h         |   39 +
>  libavcodec/aactab.c                  |   42 +
>  libavcodec/aactab.h                  |   10 +
>  13 files changed, 2491 insertions(+), 76 deletions(-)
>  create mode 100644 libavcodec/aac/aacdec_ac.c
>  create mode 100644 libavcodec/aac/aacdec_ac.h
>  create mode 100644 libavcodec/aac/aacdec_lpd.c
>  create mode 100644 libavcodec/aac/aacdec_lpd.h
>  create mode 100644 libavcodec/aac/aacdec_usac.c
>  create mode 100644 libavcodec/aac/aacdec_usac.h

seems to break fate

make  -j32 fate-source
TEST    source
--- ./tests/ref/fate/source	2024-05-20 01:14:59.407222202 +0200
+++ tests/data/fate/source	2024-05-20 01:17:32.661142354 +0200
@@ -23,6 +25,7 @@
 compat/djgpp/math.h
 compat/float/float.h
 compat/float/limits.h
+libavcodec/aac/aacdec_ac.h
 libavcodec/bitstream_template.h
 tools/decode_simple.h
 Use of av_clip() where av_clip_uintp2() could be used:
Test source failed. Look at tests/data/fate/source.err for details.
make: *** [tests/Makefile:311: fate-source] Error 1

thx

[...]
Lynne May 20, 2024, 1:11 a.m. UTC | #4
On 20/05/2024 01:19, Michael Niedermayer wrote:
> On Sun, May 19, 2024 at 06:54:44PM +0200, Lynne via ffmpeg-devel wrote:
>> This commit adds a decoder for the frequency-domain part of USAC.
>>
>> What works:
>>   - Mono
>>   - Stereo (no prediction)
>>   - Stereo (mid/side coding)
>>   - Stereo (complex prediction)
>>
>> What's left:
>>   - Speech coding
>>
>> Known issues:
>>   - Desync with certain sequences
>>   - Preroll crossover missing (shouldn't matter, bitrate adaptation only)
>> ---
>>   libavcodec/aac/Makefile              |    3 +-
>>   libavcodec/aac/aacdec.c              |  188 +--
>>   libavcodec/aac/aacdec.h              |  187 +++
>>   libavcodec/aac/aacdec_ac.c           |  208 ++++
>>   libavcodec/aac/aacdec_ac.h           |   54 +
>>   libavcodec/aac/aacdec_dsp_template.c |    4 +-
>>   libavcodec/aac/aacdec_latm.h         |   14 +-
>>   libavcodec/aac/aacdec_lpd.c          |  198 ++++
>>   libavcodec/aac/aacdec_lpd.h          |   33 +
>>   libavcodec/aac/aacdec_usac.c         | 1587 ++++++++++++++++++++++++++
>>   libavcodec/aac/aacdec_usac.h         |   39 +
>>   libavcodec/aactab.c                  |   42 +
>>   libavcodec/aactab.h                  |   10 +
>>   13 files changed, 2491 insertions(+), 76 deletions(-)
>>   create mode 100644 libavcodec/aac/aacdec_ac.c
>>   create mode 100644 libavcodec/aac/aacdec_ac.h
>>   create mode 100644 libavcodec/aac/aacdec_lpd.c
>>   create mode 100644 libavcodec/aac/aacdec_lpd.h
>>   create mode 100644 libavcodec/aac/aacdec_usac.c
>>   create mode 100644 libavcodec/aac/aacdec_usac.h
> 
> seems to break fate
> 
> make  -j32 fate-source
> TEST    source
> --- ./tests/ref/fate/source	2024-05-20 01:14:59.407222202 +0200
> +++ tests/data/fate/source	2024-05-20 01:17:32.661142354 +0200
> @@ -23,6 +25,7 @@
>   compat/djgpp/math.h
>   compat/float/float.h
>   compat/float/limits.h
> +libavcodec/aac/aacdec_ac.h
>   libavcodec/bitstream_template.h
>   tools/decode_simple.h
>   Use of av_clip() where av_clip_uintp2() could be used:
> Test source failed. Look at tests/data/fate/source.err for details.
> make: *** [tests/Makefile:311: fate-source] Error 1
> 
> thx
> 
> [...]
> 
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

Fixed:
-#ifndef AVCODEC_AACDEC_AC_H
-#define AVCODEC_AACDEC_AC_H
+#ifndef AVCODEC_AAC_AACDEC_AC_H
+#define AVCODEC_AAC_AACDEC_AC_H

Thanks
Marton Balint May 21, 2024, 7:16 a.m. UTC | #5
On Sun, 19 May 2024, Lynne via ffmpeg-devel wrote:

> On 19/05/2024 21:39, Marton Balint wrote:
>>
>>
>>  On Sun, 19 May 2024, Lynne via ffmpeg-devel wrote:
>>
>>>  This commit adds a decoder for the frequency-domain part of USAC.
>>>
>>  [...]
>> 
>>>
>>>  +/* Finish later */
>>>  +static const enum AVChannel usac_ch_pos_to_av[64] = {
>>>  +    [0] = AV_CHAN_FRONT_LEFT,
>>>  +    [1] = AV_CHAN_FRONT_RIGHT,
>>>  +    [2] = AV_CHAN_FRONT_CENTER,
>>>  +    [3] = AV_CHAN_LOW_FREQUENCY,
>>>  +    [4] = AV_CHAN_BACK_LEFT, // unsure
>>>  +    [5] = AV_CHAN_BACK_RIGHT, // unsure
>>>  +    [6] = AV_CHAN_FRONT_LEFT_OF_CENTER,
>>>  +    [7] = AV_CHAN_FRONT_RIGHT_OF_CENTER,
>>>  +    [8] = 0, /* rear surround left is missing */
>>>  +    [9] = 0, /* rear surround right is missing */
>>>  +    [10] = AV_CHAN_BACK_CENTER,
>>>  +    [11] = AV_CHAN_SURROUND_DIRECT_LEFT,
>>>  +    [12] = AV_CHAN_SURROUND_DIRECT_RIGHT,
>>>  +    [13] = AV_CHAN_SIDE_LEFT, // fairly sure
>>>  +    [14] = AV_CHAN_SIDE_RIGHT, // fairly sure
>>>  +    [15] = AV_CHAN_WIDE_LEFT, // somewhat confident
>>>  +    [16] = AV_CHAN_WIDE_RIGHT, // somewhat confident
>>>  +    [17] = AV_CHAN_TOP_FRONT_LEFT,
>>>  +    [18] = AV_CHAN_TOP_FRONT_RIGHT,
>>>  +    [19] = AV_CHAN_TOP_FRONT_CENTER,
>>>  +    [20] = AV_CHAN_TOP_BACK_LEFT,
>>>  +    [21] = AV_CHAN_TOP_BACK_RIGHT,
>>>  +    [22] = AV_CHAN_TOP_BACK_CENTER,
>>>  +    [23] = AV_CHAN_TOP_SIDE_LEFT,
>>>  +    [24] = AV_CHAN_TOP_SIDE_RIGHT,
>>>  +    [25] = AV_CHAN_TOP_CENTER,
>>>  +    [26] = AV_CHAN_LOW_FREQUENCY, // actually LFE2
>>>  +    [27] = AV_CHAN_BOTTOM_FRONT_LEFT,
>>>  +    [28] = AV_CHAN_BOTTOM_FRONT_RIGHT,
>>>  +    [29] = AV_CHAN_BOTTOM_FRONT_CENTER,
>>>  +    [30] = 0, /* top left surround is missing */
>>>  +    [31] = 0, /* top right surround is missing */
>>>  +};
>>
>>  Some comment would be nice about the source of this table (which document,
>>  which table).
>>
>>  It looks very similar to the ISO channel positons used in mov_chan. I
>>  think we follow this mapping in most cases:
>>
>>  Left  Surround is SIDE_LEFT
>>  Right Surround is SIDE_RIGHT
>>  Rear Surround Left  is BACK_LEFT
>>  Rear Surround Right is BACK_RIGHT
>>
>>  So in your table [4] and [5] should be SIDE, [8] and [9] should be BACK.
>>  [26] can be AV_CHAN_LOW_FREQUENCY_2, we do have that.
>>
>>  Yes, Left/Right Surround and Left/Right Side Surround will be the same,
>>  but those are not present in commonly used layouts at the same time.
>>
>>  Regards,
>>  Marton
>>  _______________________________________________
>>  ffmpeg-devel mailing list
>>  ffmpeg-devel@ffmpeg.org
>>  https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
>>  To unsubscribe, visit link above, or email
>>  ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
> Source of the table is ISO/IEC 23003-3, Table 74 — bsOutputChannelPos:
>
> 0 L left front FL front left
> 1 R right front FR front right
> 2 C center front FC front centre
> 3 LFE low frequency enhancement LFE1 low frequency effects-1
> 4 Ls left surround LS left surround
> 5 Rs right surround RS right surround
> 6 Lc left front center FLc front left centre
> 7 Rc right front center FRc front right centre
> 8 Lsr rear surround left BL back left
> 9 Rsr rear surround right BR back right
> 10 Cs rear center BC back centre
> 11 Lsd left surround direct LSd left surround direct
> 12 Rsd right surround direct RSd right surround direct
> 13 Lss left side surround SL side left
> 14 Rss right side surround SR side right
> 15 Lw left wide front FLw front left wide
> 16 Rw right wide front FRw front right wide
> 17 Lv left front vertical height TpFL top front left
> 18 Rv right front vertical height TpFR top front right
> 19 Cv center front vertical height TpFC top front centre
> 20 Lvr left surround vertical height rear TpBL top back left
> 21 Rvr right surround vertical height rear TpBR top back right
> 22 Cvr center vertical height rear TpBC top back centre
> 23 Lvss left vertical height side surround TpSiL top side left
> 24 Rvss right vertical height side surround TpSiR top side right
> 25 Ts top center surround TpC top centre
> 26 LFE2 low frequency enhancement 2 LFE2 low frequency effects-2
> 27 Lb left front vertical bottom BtFL bottom front left
> 28 Rb right front vertical bottom BtFR bottom front right
> 29 Cb center front vertical bottom BtFC bottom front centre
> 30 Lvs left vertical height surround TpLS top left surround
> 31 Rvs right vertical height surround TpRS top right surround
>
> Third field is "Loudspeaker position", last field is "Loudspeaker
> position according to IEC 100/1706/CDV/IEC 62574 (TC100)", each prefixed with 
> an abbreviation.
>
> I've added the source to the table comment in the code.
>
> I've also fixed the SIDE/BACK/LFE2 issue in my github repo I linked earlier.

Thanks. Later in the code when you actually use this I can see that you 
are creating a native layout:

> +    channel_config_idx = get_bits(gb, 5); /* channelConfigurationIndex */
> +    if (!channel_config_idx) {
> +        /* UsacChannelConfig() */
> +        uint8_t channel_pos[64];
> +        uint8_t nb_channels = get_escaped_value(gb, 5, 8, 16); /* numOutChannels */
> +        if (nb_channels >= 64)
> +            return AVERROR(EINVAL);
> +
> +        av_channel_layout_uninit(&ac->oc[1].ch_layout);
> +        for (int i = 0; i < nb_channels; i++)
> +            channel_pos[i] = get_bits(gb, 5); /* bsOutputChannelPos */
> +
> +        ac->oc[1].ch_layout.order = AV_CHANNEL_ORDER_NATIVE;
> +        ac->oc[1].ch_layout.nb_channels = nb_channels;
> +        ac->oc[1].ch_layout.u.mask = 0;
> +
> +        for (int i = 0; i < nb_channels; i++)
> +            ac->oc[1].ch_layout.u.mask |= 1 << usac_ch_pos_to_av[channel_pos[i]];
> +
> +        av_channel_layout_copy(&avctx->ch_layout, &ac->oc[1].ch_layout);
> +    } else {

Probably you should create a custom layout here, because the channels are 
not necessary in native order. We already have a relatively simple way to 
do that and to fall back to native layouts if possible, here is an example 
copied from mov_chan:

ret = av_channel_layout_custom_init(ch_layout, channels);
if (ret < 0)
     return ret;
for (i = 0; i < channels; i++) {
     enum AVChannel id = layout_map[i].id;
     ch_layout->u.map[i].id = (id != AV_CHAN_NONE ? id : AV_CHAN_UNKNOWN);
}
return av_channel_layout_retype(ch_layout, 0, AV_CHANNEL_LAYOUT_RETYPE_FLAG_CANONICAL);

So you should adapt this accodingly to aac.

Regards,
Marton
Lynne May 21, 2024, 5:58 p.m. UTC | #6
On 21/05/2024 09:16, Marton Balint wrote:
> 
> 
> On Sun, 19 May 2024, Lynne via ffmpeg-devel wrote:
> 
>> On 19/05/2024 21:39, Marton Balint wrote:
>>>
>>>
>>>  On Sun, 19 May 2024, Lynne via ffmpeg-devel wrote:
>>>
>>>>  This commit adds a decoder for the frequency-domain part of USAC.
>>>>
>>>  [...]
>>>
>>>>
>>>>  +/* Finish later */
>>>>  +static const enum AVChannel usac_ch_pos_to_av[64] = {
>>>>  +    [0] = AV_CHAN_FRONT_LEFT,
>>>>  +    [1] = AV_CHAN_FRONT_RIGHT,
>>>>  +    [2] = AV_CHAN_FRONT_CENTER,
>>>>  +    [3] = AV_CHAN_LOW_FREQUENCY,
>>>>  +    [4] = AV_CHAN_BACK_LEFT, // unsure
>>>>  +    [5] = AV_CHAN_BACK_RIGHT, // unsure
>>>>  +    [6] = AV_CHAN_FRONT_LEFT_OF_CENTER,
>>>>  +    [7] = AV_CHAN_FRONT_RIGHT_OF_CENTER,
>>>>  +    [8] = 0, /* rear surround left is missing */
>>>>  +    [9] = 0, /* rear surround right is missing */
>>>>  +    [10] = AV_CHAN_BACK_CENTER,
>>>>  +    [11] = AV_CHAN_SURROUND_DIRECT_LEFT,
>>>>  +    [12] = AV_CHAN_SURROUND_DIRECT_RIGHT,
>>>>  +    [13] = AV_CHAN_SIDE_LEFT, // fairly sure
>>>>  +    [14] = AV_CHAN_SIDE_RIGHT, // fairly sure
>>>>  +    [15] = AV_CHAN_WIDE_LEFT, // somewhat confident
>>>>  +    [16] = AV_CHAN_WIDE_RIGHT, // somewhat confident
>>>>  +    [17] = AV_CHAN_TOP_FRONT_LEFT,
>>>>  +    [18] = AV_CHAN_TOP_FRONT_RIGHT,
>>>>  +    [19] = AV_CHAN_TOP_FRONT_CENTER,
>>>>  +    [20] = AV_CHAN_TOP_BACK_LEFT,
>>>>  +    [21] = AV_CHAN_TOP_BACK_RIGHT,
>>>>  +    [22] = AV_CHAN_TOP_BACK_CENTER,
>>>>  +    [23] = AV_CHAN_TOP_SIDE_LEFT,
>>>>  +    [24] = AV_CHAN_TOP_SIDE_RIGHT,
>>>>  +    [25] = AV_CHAN_TOP_CENTER,
>>>>  +    [26] = AV_CHAN_LOW_FREQUENCY, // actually LFE2
>>>>  +    [27] = AV_CHAN_BOTTOM_FRONT_LEFT,
>>>>  +    [28] = AV_CHAN_BOTTOM_FRONT_RIGHT,
>>>>  +    [29] = AV_CHAN_BOTTOM_FRONT_CENTER,
>>>>  +    [30] = 0, /* top left surround is missing */
>>>>  +    [31] = 0, /* top right surround is missing */
>>>>  +};
>>>
>>>  Some comment would be nice about the source of this table (which 
>>> document,
>>>  which table).
>>>
>>>  It looks very similar to the ISO channel positons used in mov_chan. I
>>>  think we follow this mapping in most cases:
>>>
>>>  Left  Surround is SIDE_LEFT
>>>  Right Surround is SIDE_RIGHT
>>>  Rear Surround Left  is BACK_LEFT
>>>  Rear Surround Right is BACK_RIGHT
>>>
>>>  So in your table [4] and [5] should be SIDE, [8] and [9] should be 
>>> BACK.
>>>  [26] can be AV_CHAN_LOW_FREQUENCY_2, we do have that.
>>>
>>>  Yes, Left/Right Surround and Left/Right Side Surround will be the same,
>>>  but those are not present in commonly used layouts at the same time.
>>>
>>>  Regards,
>>>  Marton
>>>  _______________________________________________
>>>  ffmpeg-devel mailing list
>>>  ffmpeg-devel@ffmpeg.org
>>>  https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>>
>>>  To unsubscribe, visit link above, or email
>>>  ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>>
>> Source of the table is ISO/IEC 23003-3, Table 74 — bsOutputChannelPos:
>>
>> 0 L left front FL front left
>> 1 R right front FR front right
>> 2 C center front FC front centre
>> 3 LFE low frequency enhancement LFE1 low frequency effects-1
>> 4 Ls left surround LS left surround
>> 5 Rs right surround RS right surround
>> 6 Lc left front center FLc front left centre
>> 7 Rc right front center FRc front right centre
>> 8 Lsr rear surround left BL back left
>> 9 Rsr rear surround right BR back right
>> 10 Cs rear center BC back centre
>> 11 Lsd left surround direct LSd left surround direct
>> 12 Rsd right surround direct RSd right surround direct
>> 13 Lss left side surround SL side left
>> 14 Rss right side surround SR side right
>> 15 Lw left wide front FLw front left wide
>> 16 Rw right wide front FRw front right wide
>> 17 Lv left front vertical height TpFL top front left
>> 18 Rv right front vertical height TpFR top front right
>> 19 Cv center front vertical height TpFC top front centre
>> 20 Lvr left surround vertical height rear TpBL top back left
>> 21 Rvr right surround vertical height rear TpBR top back right
>> 22 Cvr center vertical height rear TpBC top back centre
>> 23 Lvss left vertical height side surround TpSiL top side left
>> 24 Rvss right vertical height side surround TpSiR top side right
>> 25 Ts top center surround TpC top centre
>> 26 LFE2 low frequency enhancement 2 LFE2 low frequency effects-2
>> 27 Lb left front vertical bottom BtFL bottom front left
>> 28 Rb right front vertical bottom BtFR bottom front right
>> 29 Cb center front vertical bottom BtFC bottom front centre
>> 30 Lvs left vertical height surround TpLS top left surround
>> 31 Rvs right vertical height surround TpRS top right surround
>>
>> Third field is "Loudspeaker position", last field is "Loudspeaker
>> position according to IEC 100/1706/CDV/IEC 62574 (TC100)", each 
>> prefixed with an abbreviation.
>>
>> I've added the source to the table comment in the code.
>>
>> I've also fixed the SIDE/BACK/LFE2 issue in my github repo I linked 
>> earlier.
> 
> Thanks. Later in the code when you actually use this I can see that you 
> are creating a native layout:
> 
>> +    channel_config_idx = get_bits(gb, 5); /* 
>> channelConfigurationIndex */
>> +    if (!channel_config_idx) {
>> +        /* UsacChannelConfig() */
>> +        uint8_t channel_pos[64];
>> +        uint8_t nb_channels = get_escaped_value(gb, 5, 8, 16); /* 
>> numOutChannels */
>> +        if (nb_channels >= 64)
>> +            return AVERROR(EINVAL);
>> +
>> +        av_channel_layout_uninit(&ac->oc[1].ch_layout);
>> +        for (int i = 0; i < nb_channels; i++)
>> +            channel_pos[i] = get_bits(gb, 5); /* bsOutputChannelPos */
>> +
>> +        ac->oc[1].ch_layout.order = AV_CHANNEL_ORDER_NATIVE;
>> +        ac->oc[1].ch_layout.nb_channels = nb_channels;
>> +        ac->oc[1].ch_layout.u.mask = 0;
>> +
>> +        for (int i = 0; i < nb_channels; i++)
>> +            ac->oc[1].ch_layout.u.mask |= 1 << 
>> usac_ch_pos_to_av[channel_pos[i]];
>> +
>> +        av_channel_layout_copy(&avctx->ch_layout, &ac->oc[1].ch_layout);
>> +    } else {
> 
> Probably you should create a custom layout here, because the channels 
> are not necessary in native order. We already have a relatively simple 
> way to do that and to fall back to native layouts if possible, here is 
> an example copied from mov_chan:
> 
> ret = av_channel_layout_custom_init(ch_layout, channels);
> if (ret < 0)
>      return ret;
> for (i = 0; i < channels; i++) {
>      enum AVChannel id = layout_map[i].id;
>      ch_layout->u.map[i].id = (id != AV_CHAN_NONE ? id : AV_CHAN_UNKNOWN);
> }
> return av_channel_layout_retype(ch_layout, 0, 
> AV_CHANNEL_LAYOUT_RETYPE_FLAG_CANONICAL);
> 
> So you should adapt this accodingly to aac.
> 
> Regards,
> Marton
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

According to the spec:

 > In case of multiple channel elements the index i of
 > bsOutputChannelPos[i] indicates the position in which the channel
 > appears in the bitstream.

So the channels will always be in native order, as far as I understand.
Marton Balint May 21, 2024, 7:40 p.m. UTC | #7
On Tue, 21 May 2024, Lynne via ffmpeg-devel wrote:

> On 21/05/2024 09:16, Marton Balint wrote:
>>
>>
>>  On Sun, 19 May 2024, Lynne via ffmpeg-devel wrote:
>>
>>>  On 19/05/2024 21:39, Marton Balint wrote:
>>>> 
>>>>
>>>>   On Sun, 19 May 2024, Lynne via ffmpeg-devel wrote:
>>>>
>>>>>   This commit adds a decoder for the frequency-domain part of USAC.
>>>>>
>>>>   [...]
>>>> 
>>>>>
>>>>>   +/* Finish later */
>>>>>   +static const enum AVChannel usac_ch_pos_to_av[64] = {
>>>>>   +    [0] = AV_CHAN_FRONT_LEFT,
>>>>>   +    [1] = AV_CHAN_FRONT_RIGHT,
>>>>>   +    [2] = AV_CHAN_FRONT_CENTER,
>>>>>   +    [3] = AV_CHAN_LOW_FREQUENCY,
>>>>>   +    [4] = AV_CHAN_BACK_LEFT, // unsure
>>>>>   +    [5] = AV_CHAN_BACK_RIGHT, // unsure
>>>>>   +    [6] = AV_CHAN_FRONT_LEFT_OF_CENTER,
>>>>>   +    [7] = AV_CHAN_FRONT_RIGHT_OF_CENTER,
>>>>>   +    [8] = 0, /* rear surround left is missing */
>>>>>   +    [9] = 0, /* rear surround right is missing */
>>>>>   +    [10] = AV_CHAN_BACK_CENTER,
>>>>>   +    [11] = AV_CHAN_SURROUND_DIRECT_LEFT,
>>>>>   +    [12] = AV_CHAN_SURROUND_DIRECT_RIGHT,
>>>>>   +    [13] = AV_CHAN_SIDE_LEFT, // fairly sure
>>>>>   +    [14] = AV_CHAN_SIDE_RIGHT, // fairly sure
>>>>>   +    [15] = AV_CHAN_WIDE_LEFT, // somewhat confident
>>>>>   +    [16] = AV_CHAN_WIDE_RIGHT, // somewhat confident
>>>>>   +    [17] = AV_CHAN_TOP_FRONT_LEFT,
>>>>>   +    [18] = AV_CHAN_TOP_FRONT_RIGHT,
>>>>>   +    [19] = AV_CHAN_TOP_FRONT_CENTER,
>>>>>   +    [20] = AV_CHAN_TOP_BACK_LEFT,
>>>>>   +    [21] = AV_CHAN_TOP_BACK_RIGHT,
>>>>>   +    [22] = AV_CHAN_TOP_BACK_CENTER,
>>>>>   +    [23] = AV_CHAN_TOP_SIDE_LEFT,
>>>>>   +    [24] = AV_CHAN_TOP_SIDE_RIGHT,
>>>>>   +    [25] = AV_CHAN_TOP_CENTER,
>>>>>   +    [26] = AV_CHAN_LOW_FREQUENCY, // actually LFE2
>>>>>   +    [27] = AV_CHAN_BOTTOM_FRONT_LEFT,
>>>>>   +    [28] = AV_CHAN_BOTTOM_FRONT_RIGHT,
>>>>>   +    [29] = AV_CHAN_BOTTOM_FRONT_CENTER,
>>>>>   +    [30] = 0, /* top left surround is missing */
>>>>>   +    [31] = 0, /* top right surround is missing */
>>>>>   +};
>>>>
>>>>   Some comment would be nice about the source of this table (which
>>>>  document,
>>>>   which table).
>>>>
>>>>   It looks very similar to the ISO channel positons used in mov_chan. I
>>>>   think we follow this mapping in most cases:
>>>>
>>>>   Left  Surround is SIDE_LEFT
>>>>   Right Surround is SIDE_RIGHT
>>>>   Rear Surround Left  is BACK_LEFT
>>>>   Rear Surround Right is BACK_RIGHT
>>>>
>>>>   So in your table [4] and [5] should be SIDE, [8] and [9] should be
>>>>  BACK.
>>>>   [26] can be AV_CHAN_LOW_FREQUENCY_2, we do have that.
>>>>
>>>>   Yes, Left/Right Surround and Left/Right Side Surround will be the same,
>>>>   but those are not present in commonly used layouts at the same time.
>>>>
>>>>   Regards,
>>>>   Marton
>>>>   _______________________________________________
>>>>   ffmpeg-devel mailing list
>>>>   ffmpeg-devel@ffmpeg.org
>>>>   https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>>>
>>>>   To unsubscribe, visit link above, or email
>>>>   ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>>>
>>>  Source of the table is ISO/IEC 23003-3, Table 74 — bsOutputChannelPos:
>>>
>>>  0 L left front FL front left
>>>  1 R right front FR front right
>>>  2 C center front FC front centre
>>>  3 LFE low frequency enhancement LFE1 low frequency effects-1
>>>  4 Ls left surround LS left surround
>>>  5 Rs right surround RS right surround
>>>  6 Lc left front center FLc front left centre
>>>  7 Rc right front center FRc front right centre
>>>  8 Lsr rear surround left BL back left
>>>  9 Rsr rear surround right BR back right
>>>  10 Cs rear center BC back centre
>>>  11 Lsd left surround direct LSd left surround direct
>>>  12 Rsd right surround direct RSd right surround direct
>>>  13 Lss left side surround SL side left
>>>  14 Rss right side surround SR side right
>>>  15 Lw left wide front FLw front left wide
>>>  16 Rw right wide front FRw front right wide
>>>  17 Lv left front vertical height TpFL top front left
>>>  18 Rv right front vertical height TpFR top front right
>>>  19 Cv center front vertical height TpFC top front centre
>>>  20 Lvr left surround vertical height rear TpBL top back left
>>>  21 Rvr right surround vertical height rear TpBR top back right
>>>  22 Cvr center vertical height rear TpBC top back centre
>>>  23 Lvss left vertical height side surround TpSiL top side left
>>>  24 Rvss right vertical height side surround TpSiR top side right
>>>  25 Ts top center surround TpC top centre
>>>  26 LFE2 low frequency enhancement 2 LFE2 low frequency effects-2
>>>  27 Lb left front vertical bottom BtFL bottom front left
>>>  28 Rb right front vertical bottom BtFR bottom front right
>>>  29 Cb center front vertical bottom BtFC bottom front centre
>>>  30 Lvs left vertical height surround TpLS top left surround
>>>  31 Rvs right vertical height surround TpRS top right surround
>>>
>>>  Third field is "Loudspeaker position", last field is "Loudspeaker
>>>  position according to IEC 100/1706/CDV/IEC 62574 (TC100)", each prefixed
>>>  with an abbreviation.
>>>
>>>  I've added the source to the table comment in the code.
>>>
>>>  I've also fixed the SIDE/BACK/LFE2 issue in my github repo I linked
>>>  earlier.
>>
>>  Thanks. Later in the code when you actually use this I can see that you
>>  are creating a native layout:
>>
>>>  +    channel_config_idx = get_bits(gb, 5); /* channelConfigurationIndex
>>>  */
>>>  +    if (!channel_config_idx) {
>>>  +        /* UsacChannelConfig() */
>>>  +        uint8_t channel_pos[64];
>>>  +        uint8_t nb_channels = get_escaped_value(gb, 5, 8, 16); /*
>>>  numOutChannels */
>>>  +        if (nb_channels >= 64)
>>>  +            return AVERROR(EINVAL);
>>>  +
>>>  +        av_channel_layout_uninit(&ac->oc[1].ch_layout);
>>>  +        for (int i = 0; i < nb_channels; i++)
>>>  +            channel_pos[i] = get_bits(gb, 5); /* bsOutputChannelPos */
>>>  +
>>>  +        ac->oc[1].ch_layout.order = AV_CHANNEL_ORDER_NATIVE;
>>>  +        ac->oc[1].ch_layout.nb_channels = nb_channels;
>>>  +        ac->oc[1].ch_layout.u.mask = 0;
>>>  +
>>>  +        for (int i = 0; i < nb_channels; i++)
>>>  +            ac->oc[1].ch_layout.u.mask |= 1 <<
>>>  usac_ch_pos_to_av[channel_pos[i]];
>>>  +
>>>  +        av_channel_layout_copy(&avctx->ch_layout, &ac->oc[1].ch_layout);
>>>  +    } else {
>>
>>  Probably you should create a custom layout here, because the channels are
>>  not necessary in native order. We already have a relatively simple way to
>>  do that and to fall back to native layouts if possible, here is an example
>>  copied from mov_chan:
>>
>>  ret = av_channel_layout_custom_init(ch_layout, channels);
>>  if (ret < 0)
>>      return ret;
>>  for (i = 0; i < channels; i++) {
>>       enum AVChannel id = layout_map[i].id;
>>       ch_layout->u.map[i].id = (id != AV_CHAN_NONE ? id : AV_CHAN_UNKNOWN);
>> }
>>  return av_channel_layout_retype(ch_layout, 0,
>>  AV_CHANNEL_LAYOUT_RETYPE_FLAG_CANONICAL);
>>
>>  So you should adapt this accodingly to aac.
>>
>>  Regards,
>>  Marton
>>  _______________________________________________
>>  ffmpeg-devel mailing list
>>  ffmpeg-devel@ffmpeg.org
>>  https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
>>  To unsubscribe, visit link above, or email
>>  ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
> According to the spec:
>
>>  In case of multiple channel elements the index i of
>>  bsOutputChannelPos[i] indicates the position in which the channel
>>  appears in the bitstream.
>
> So the channels will always be in native order, as far as I understand.
>

AV_CHANNEL_ORDER_NATIVE expects channels in the order of the channel IDs 
in the AVChannel enum. Surely that is not the case here, unless the 
decoder reorders the channels. Or what if an output position appears 
multiple times?

Regards,
Marton
Lynne May 21, 2024, 7:52 p.m. UTC | #8
On 21/05/2024 21:40, Marton Balint wrote:
> 
> 
> On Tue, 21 May 2024, Lynne via ffmpeg-devel wrote:
> 
>> On 21/05/2024 09:16, Marton Balint wrote:
>>>
>>>
>>>  On Sun, 19 May 2024, Lynne via ffmpeg-devel wrote:
>>>
>>>>  On 19/05/2024 21:39, Marton Balint wrote:
>>>>>
>>>>>
>>>>>   On Sun, 19 May 2024, Lynne via ffmpeg-devel wrote:
>>>>>
>>>>>>   This commit adds a decoder for the frequency-domain part of USAC.
>>>>>>
>>>>>   [...]
>>>>>
>>>>>>
>>>>>>   +/* Finish later */
>>>>>>   +static const enum AVChannel usac_ch_pos_to_av[64] = {
>>>>>>   +    [0] = AV_CHAN_FRONT_LEFT,
>>>>>>   +    [1] = AV_CHAN_FRONT_RIGHT,
>>>>>>   +    [2] = AV_CHAN_FRONT_CENTER,
>>>>>>   +    [3] = AV_CHAN_LOW_FREQUENCY,
>>>>>>   +    [4] = AV_CHAN_BACK_LEFT, // unsure
>>>>>>   +    [5] = AV_CHAN_BACK_RIGHT, // unsure
>>>>>>   +    [6] = AV_CHAN_FRONT_LEFT_OF_CENTER,
>>>>>>   +    [7] = AV_CHAN_FRONT_RIGHT_OF_CENTER,
>>>>>>   +    [8] = 0, /* rear surround left is missing */
>>>>>>   +    [9] = 0, /* rear surround right is missing */
>>>>>>   +    [10] = AV_CHAN_BACK_CENTER,
>>>>>>   +    [11] = AV_CHAN_SURROUND_DIRECT_LEFT,
>>>>>>   +    [12] = AV_CHAN_SURROUND_DIRECT_RIGHT,
>>>>>>   +    [13] = AV_CHAN_SIDE_LEFT, // fairly sure
>>>>>>   +    [14] = AV_CHAN_SIDE_RIGHT, // fairly sure
>>>>>>   +    [15] = AV_CHAN_WIDE_LEFT, // somewhat confident
>>>>>>   +    [16] = AV_CHAN_WIDE_RIGHT, // somewhat confident
>>>>>>   +    [17] = AV_CHAN_TOP_FRONT_LEFT,
>>>>>>   +    [18] = AV_CHAN_TOP_FRONT_RIGHT,
>>>>>>   +    [19] = AV_CHAN_TOP_FRONT_CENTER,
>>>>>>   +    [20] = AV_CHAN_TOP_BACK_LEFT,
>>>>>>   +    [21] = AV_CHAN_TOP_BACK_RIGHT,
>>>>>>   +    [22] = AV_CHAN_TOP_BACK_CENTER,
>>>>>>   +    [23] = AV_CHAN_TOP_SIDE_LEFT,
>>>>>>   +    [24] = AV_CHAN_TOP_SIDE_RIGHT,
>>>>>>   +    [25] = AV_CHAN_TOP_CENTER,
>>>>>>   +    [26] = AV_CHAN_LOW_FREQUENCY, // actually LFE2
>>>>>>   +    [27] = AV_CHAN_BOTTOM_FRONT_LEFT,
>>>>>>   +    [28] = AV_CHAN_BOTTOM_FRONT_RIGHT,
>>>>>>   +    [29] = AV_CHAN_BOTTOM_FRONT_CENTER,
>>>>>>   +    [30] = 0, /* top left surround is missing */
>>>>>>   +    [31] = 0, /* top right surround is missing */
>>>>>>   +};
>>>>>
>>>>>   Some comment would be nice about the source of this table (which
>>>>>  document,
>>>>>   which table).
>>>>>
>>>>>   It looks very similar to the ISO channel positons used in 
>>>>> mov_chan. I
>>>>>   think we follow this mapping in most cases:
>>>>>
>>>>>   Left  Surround is SIDE_LEFT
>>>>>   Right Surround is SIDE_RIGHT
>>>>>   Rear Surround Left  is BACK_LEFT
>>>>>   Rear Surround Right is BACK_RIGHT
>>>>>
>>>>>   So in your table [4] and [5] should be SIDE, [8] and [9] should be
>>>>>  BACK.
>>>>>   [26] can be AV_CHAN_LOW_FREQUENCY_2, we do have that.
>>>>>
>>>>>   Yes, Left/Right Surround and Left/Right Side Surround will be the 
>>>>> same,
>>>>>   but those are not present in commonly used layouts at the same time.
>>>>>
>>>>>   Regards,
>>>>>   Marton
>>>>>   _______________________________________________
>>>>>   ffmpeg-devel mailing list
>>>>>   ffmpeg-devel@ffmpeg.org
>>>>>   https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>>>>
>>>>>   To unsubscribe, visit link above, or email
>>>>>   ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>>>>
>>>>  Source of the table is ISO/IEC 23003-3, Table 74 — bsOutputChannelPos:
>>>>
>>>>  0 L left front FL front left
>>>>  1 R right front FR front right
>>>>  2 C center front FC front centre
>>>>  3 LFE low frequency enhancement LFE1 low frequency effects-1
>>>>  4 Ls left surround LS left surround
>>>>  5 Rs right surround RS right surround
>>>>  6 Lc left front center FLc front left centre
>>>>  7 Rc right front center FRc front right centre
>>>>  8 Lsr rear surround left BL back left
>>>>  9 Rsr rear surround right BR back right
>>>>  10 Cs rear center BC back centre
>>>>  11 Lsd left surround direct LSd left surround direct
>>>>  12 Rsd right surround direct RSd right surround direct
>>>>  13 Lss left side surround SL side left
>>>>  14 Rss right side surround SR side right
>>>>  15 Lw left wide front FLw front left wide
>>>>  16 Rw right wide front FRw front right wide
>>>>  17 Lv left front vertical height TpFL top front left
>>>>  18 Rv right front vertical height TpFR top front right
>>>>  19 Cv center front vertical height TpFC top front centre
>>>>  20 Lvr left surround vertical height rear TpBL top back left
>>>>  21 Rvr right surround vertical height rear TpBR top back right
>>>>  22 Cvr center vertical height rear TpBC top back centre
>>>>  23 Lvss left vertical height side surround TpSiL top side left
>>>>  24 Rvss right vertical height side surround TpSiR top side right
>>>>  25 Ts top center surround TpC top centre
>>>>  26 LFE2 low frequency enhancement 2 LFE2 low frequency effects-2
>>>>  27 Lb left front vertical bottom BtFL bottom front left
>>>>  28 Rb right front vertical bottom BtFR bottom front right
>>>>  29 Cb center front vertical bottom BtFC bottom front centre
>>>>  30 Lvs left vertical height surround TpLS top left surround
>>>>  31 Rvs right vertical height surround TpRS top right surround
>>>>
>>>>  Third field is "Loudspeaker position", last field is "Loudspeaker
>>>>  position according to IEC 100/1706/CDV/IEC 62574 (TC100)", each 
>>>> prefixed
>>>>  with an abbreviation.
>>>>
>>>>  I've added the source to the table comment in the code.
>>>>
>>>>  I've also fixed the SIDE/BACK/LFE2 issue in my github repo I linked
>>>>  earlier.
>>>
>>>  Thanks. Later in the code when you actually use this I can see that you
>>>  are creating a native layout:
>>>
>>>>  +    channel_config_idx = get_bits(gb, 5); /* 
>>>> channelConfigurationIndex
>>>>  */
>>>>  +    if (!channel_config_idx) {
>>>>  +        /* UsacChannelConfig() */
>>>>  +        uint8_t channel_pos[64];
>>>>  +        uint8_t nb_channels = get_escaped_value(gb, 5, 8, 16); /*
>>>>  numOutChannels */
>>>>  +        if (nb_channels >= 64)
>>>>  +            return AVERROR(EINVAL);
>>>>  +
>>>>  +        av_channel_layout_uninit(&ac->oc[1].ch_layout);
>>>>  +        for (int i = 0; i < nb_channels; i++)
>>>>  +            channel_pos[i] = get_bits(gb, 5); /* 
>>>> bsOutputChannelPos */
>>>>  +
>>>>  +        ac->oc[1].ch_layout.order = AV_CHANNEL_ORDER_NATIVE;
>>>>  +        ac->oc[1].ch_layout.nb_channels = nb_channels;
>>>>  +        ac->oc[1].ch_layout.u.mask = 0;
>>>>  +
>>>>  +        for (int i = 0; i < nb_channels; i++)
>>>>  +            ac->oc[1].ch_layout.u.mask |= 1 <<
>>>>  usac_ch_pos_to_av[channel_pos[i]];
>>>>  +
>>>>  +        av_channel_layout_copy(&avctx->ch_layout, 
>>>> &ac->oc[1].ch_layout);
>>>>  +    } else {
>>>
>>>  Probably you should create a custom layout here, because the 
>>> channels are
>>>  not necessary in native order. We already have a relatively simple 
>>> way to
>>>  do that and to fall back to native layouts if possible, here is an 
>>> example
>>>  copied from mov_chan:
>>>
>>>  ret = av_channel_layout_custom_init(ch_layout, channels);
>>>  if (ret < 0)
>>>      return ret;
>>>  for (i = 0; i < channels; i++) {
>>>       enum AVChannel id = layout_map[i].id;
>>>       ch_layout->u.map[i].id = (id != AV_CHAN_NONE ? id : 
>>> AV_CHAN_UNKNOWN);
>>> }
>>>  return av_channel_layout_retype(ch_layout, 0,
>>>  AV_CHANNEL_LAYOUT_RETYPE_FLAG_CANONICAL);
>>>
>>>  So you should adapt this accodingly to aac.
>>>
>>>  Regards,
>>>  Marton
>>>  _______________________________________________
>>>  ffmpeg-devel mailing list
>>>  ffmpeg-devel@ffmpeg.org
>>>  https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>>
>>>  To unsubscribe, visit link above, or email
>>>  ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>>
>> According to the spec:
>>
>>>  In case of multiple channel elements the index i of
>>>  bsOutputChannelPos[i] indicates the position in which the channel
>>>  appears in the bitstream.
>>
>> So the channels will always be in native order, as far as I understand.
>>
> 
> AV_CHANNEL_ORDER_NATIVE expects channels in the order of the channel IDs 
> in the AVChannel enum. Surely that is not the case here, unless the 
> decoder reorders the channels. Or what if an output position appears 
> multiple times?
> 
> Regards,
> Marton
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".


It should be the case here, we shouldn't need reordering as NATIVE just 
lets you specify what order the elements appear in the bitstream.
Marton Balint May 21, 2024, 8:12 p.m. UTC | #9
On Tue, 21 May 2024, Lynne via ffmpeg-devel wrote:

> On 21/05/2024 21:40, Marton Balint wrote:
>>
>>
>>  On Tue, 21 May 2024, Lynne via ffmpeg-devel wrote:
>>
>>>  On 21/05/2024 09:16, Marton Balint wrote:
>>>> 
>>>>
>>>>   On Sun, 19 May 2024, Lynne via ffmpeg-devel wrote:
>>>>
>>>>>   On 19/05/2024 21:39, Marton Balint wrote:
>>>>>> 
>>>>>>
>>>>>>    On Sun, 19 May 2024, Lynne via ffmpeg-devel wrote:
>>>>>>
>>>>>>>    This commit adds a decoder for the frequency-domain part of USAC.
>>>>>>>
>>>>>>    [...]
>>>>>> 
>>>>>>>
>>>>>>>    +/* Finish later */
>>>>>>>    +static const enum AVChannel usac_ch_pos_to_av[64] = {
>>>>>>>    +    [0] = AV_CHAN_FRONT_LEFT,
>>>>>>>    +    [1] = AV_CHAN_FRONT_RIGHT,
>>>>>>>    +    [2] = AV_CHAN_FRONT_CENTER,
>>>>>>>    +    [3] = AV_CHAN_LOW_FREQUENCY,
>>>>>>>    +    [4] = AV_CHAN_BACK_LEFT, // unsure
>>>>>>>    +    [5] = AV_CHAN_BACK_RIGHT, // unsure
>>>>>>>    +    [6] = AV_CHAN_FRONT_LEFT_OF_CENTER,
>>>>>>>    +    [7] = AV_CHAN_FRONT_RIGHT_OF_CENTER,
>>>>>>>    +    [8] = 0, /* rear surround left is missing */
>>>>>>>    +    [9] = 0, /* rear surround right is missing */
>>>>>>>    +    [10] = AV_CHAN_BACK_CENTER,
>>>>>>>    +    [11] = AV_CHAN_SURROUND_DIRECT_LEFT,
>>>>>>>    +    [12] = AV_CHAN_SURROUND_DIRECT_RIGHT,
>>>>>>>    +    [13] = AV_CHAN_SIDE_LEFT, // fairly sure
>>>>>>>    +    [14] = AV_CHAN_SIDE_RIGHT, // fairly sure
>>>>>>>    +    [15] = AV_CHAN_WIDE_LEFT, // somewhat confident
>>>>>>>    +    [16] = AV_CHAN_WIDE_RIGHT, // somewhat confident
>>>>>>>    +    [17] = AV_CHAN_TOP_FRONT_LEFT,
>>>>>>>    +    [18] = AV_CHAN_TOP_FRONT_RIGHT,
>>>>>>>    +    [19] = AV_CHAN_TOP_FRONT_CENTER,
>>>>>>>    +    [20] = AV_CHAN_TOP_BACK_LEFT,
>>>>>>>    +    [21] = AV_CHAN_TOP_BACK_RIGHT,
>>>>>>>    +    [22] = AV_CHAN_TOP_BACK_CENTER,
>>>>>>>    +    [23] = AV_CHAN_TOP_SIDE_LEFT,
>>>>>>>    +    [24] = AV_CHAN_TOP_SIDE_RIGHT,
>>>>>>>    +    [25] = AV_CHAN_TOP_CENTER,
>>>>>>>    +    [26] = AV_CHAN_LOW_FREQUENCY, // actually LFE2
>>>>>>>    +    [27] = AV_CHAN_BOTTOM_FRONT_LEFT,
>>>>>>>    +    [28] = AV_CHAN_BOTTOM_FRONT_RIGHT,
>>>>>>>    +    [29] = AV_CHAN_BOTTOM_FRONT_CENTER,
>>>>>>>    +    [30] = 0, /* top left surround is missing */
>>>>>>>    +    [31] = 0, /* top right surround is missing */
>>>>>>>    +};
>>>>>>
>>>>>>    Some comment would be nice about the source of this table (which
>>>>>>   document,
>>>>>>    which table).
>>>>>>
>>>>>>    It looks very similar to the ISO channel positons used in mov_chan.
>>>>>>  I
>>>>>>    think we follow this mapping in most cases:
>>>>>>
>>>>>>    Left  Surround is SIDE_LEFT
>>>>>>    Right Surround is SIDE_RIGHT
>>>>>>    Rear Surround Left  is BACK_LEFT
>>>>>>    Rear Surround Right is BACK_RIGHT
>>>>>>
>>>>>>    So in your table [4] and [5] should be SIDE, [8] and [9] should be
>>>>>>   BACK.
>>>>>>    [26] can be AV_CHAN_LOW_FREQUENCY_2, we do have that.
>>>>>>
>>>>>>    Yes, Left/Right Surround and Left/Right Side Surround will be the
>>>>>>  same,
>>>>>>    but those are not present in commonly used layouts at the same time.
>>>>>>
>>>>>>    Regards,
>>>>>>    Marton
>>>>>>    _______________________________________________
>>>>>>    ffmpeg-devel mailing list
>>>>>>    ffmpeg-devel@ffmpeg.org
>>>>>>    https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>>>>>
>>>>>>    To unsubscribe, visit link above, or email
>>>>>>    ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>>>>>
>>>>>   Source of the table is ISO/IEC 23003-3, Table 74 — bsOutputChannelPos:
>>>>>
>>>>>   0 L left front FL front left
>>>>>   1 R right front FR front right
>>>>>   2 C center front FC front centre
>>>>>   3 LFE low frequency enhancement LFE1 low frequency effects-1
>>>>>   4 Ls left surround LS left surround
>>>>>   5 Rs right surround RS right surround
>>>>>   6 Lc left front center FLc front left centre
>>>>>   7 Rc right front center FRc front right centre
>>>>>   8 Lsr rear surround left BL back left
>>>>>   9 Rsr rear surround right BR back right
>>>>>   10 Cs rear center BC back centre
>>>>>   11 Lsd left surround direct LSd left surround direct
>>>>>   12 Rsd right surround direct RSd right surround direct
>>>>>   13 Lss left side surround SL side left
>>>>>   14 Rss right side surround SR side right
>>>>>   15 Lw left wide front FLw front left wide
>>>>>   16 Rw right wide front FRw front right wide
>>>>>   17 Lv left front vertical height TpFL top front left
>>>>>   18 Rv right front vertical height TpFR top front right
>>>>>   19 Cv center front vertical height TpFC top front centre
>>>>>   20 Lvr left surround vertical height rear TpBL top back left
>>>>>   21 Rvr right surround vertical height rear TpBR top back right
>>>>>   22 Cvr center vertical height rear TpBC top back centre
>>>>>   23 Lvss left vertical height side surround TpSiL top side left
>>>>>   24 Rvss right vertical height side surround TpSiR top side right
>>>>>   25 Ts top center surround TpC top centre
>>>>>   26 LFE2 low frequency enhancement 2 LFE2 low frequency effects-2
>>>>>   27 Lb left front vertical bottom BtFL bottom front left
>>>>>   28 Rb right front vertical bottom BtFR bottom front right
>>>>>   29 Cb center front vertical bottom BtFC bottom front centre
>>>>>   30 Lvs left vertical height surround TpLS top left surround
>>>>>   31 Rvs right vertical height surround TpRS top right surround
>>>>>
>>>>>   Third field is "Loudspeaker position", last field is "Loudspeaker
>>>>>   position according to IEC 100/1706/CDV/IEC 62574 (TC100)", each
>>>>>  prefixed
>>>>>   with an abbreviation.
>>>>>
>>>>>   I've added the source to the table comment in the code.
>>>>>
>>>>>   I've also fixed the SIDE/BACK/LFE2 issue in my github repo I linked
>>>>>   earlier.
>>>>
>>>>   Thanks. Later in the code when you actually use this I can see that you
>>>>   are creating a native layout:
>>>>
>>>>>   +    channel_config_idx = get_bits(gb, 5); /*
>>>>>  channelConfigurationIndex
>>>>>   */
>>>>>   +    if (!channel_config_idx) {
>>>>>   +        /* UsacChannelConfig() */
>>>>>   +        uint8_t channel_pos[64];
>>>>>   +        uint8_t nb_channels = get_escaped_value(gb, 5, 8, 16); /*
>>>>>   numOutChannels */
>>>>>   +        if (nb_channels >= 64)
>>>>>   +            return AVERROR(EINVAL);
>>>>>   +
>>>>>   +        av_channel_layout_uninit(&ac->oc[1].ch_layout);
>>>>>   +        for (int i = 0; i < nb_channels; i++)
>>>>>   +            channel_pos[i] = get_bits(gb, 5); /* bsOutputChannelPos
>>>>>  */
>>>>>   +
>>>>>   +        ac->oc[1].ch_layout.order = AV_CHANNEL_ORDER_NATIVE;
>>>>>   +        ac->oc[1].ch_layout.nb_channels = nb_channels;
>>>>>   +        ac->oc[1].ch_layout.u.mask = 0;
>>>>>   +
>>>>>   +        for (int i = 0; i < nb_channels; i++)
>>>>>   +            ac->oc[1].ch_layout.u.mask |= 1 <<
>>>>>   usac_ch_pos_to_av[channel_pos[i]];
>>>>>   +
>>>>>   +        av_channel_layout_copy(&avctx->ch_layout, 
>>>>> & ac->oc[1].ch_layout);
>>>>>   +    } else {
>>>>
>>>>   Probably you should create a custom layout here, because the channels
>>>>  are
>>>>   not necessary in native order. We already have a relatively simple way
>>>>  to
>>>>   do that and to fall back to native layouts if possible, here is an
>>>>  example
>>>>   copied from mov_chan:
>>>>
>>>>   ret = av_channel_layout_custom_init(ch_layout, channels);
>>>>   if (ret < 0)
>>>>       return ret;
>>>>   for (i = 0; i < channels; i++) {
>>>>        enum AVChannel id = layout_map[i].id;
>>>>        ch_layout->u.map[i].id = (id != AV_CHAN_NONE ? id :
>>>>  AV_CHAN_UNKNOWN);
>>>> }
>>>>   return av_channel_layout_retype(ch_layout, 0,
>>>>   AV_CHANNEL_LAYOUT_RETYPE_FLAG_CANONICAL);
>>>>
>>>>   So you should adapt this accodingly to aac.
>>>>
>>>>   Regards,
>>>>   Marton
>>>>   _______________________________________________
>>>>   ffmpeg-devel mailing list
>>>>   ffmpeg-devel@ffmpeg.org
>>>>   https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>>>
>>>>   To unsubscribe, visit link above, or email
>>>>   ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>>>
>>>  According to the spec:
>>>
>>>>   In case of multiple channel elements the index i of
>>>>   bsOutputChannelPos[i] indicates the position in which the channel
>>>>   appears in the bitstream.
>>>
>>>  So the channels will always be in native order, as far as I understand.
>>>
>>
>>  AV_CHANNEL_ORDER_NATIVE expects channels in the order of the channel IDs
>>  in the AVChannel enum. Surely that is not the case here, unless the
>>  decoder reorders the channels. Or what if an output position appears
>>  multiple times?
>>
>>  Regards,
>>  Marton
>>  _______________________________________________
>>  ffmpeg-devel mailing list
>>  ffmpeg-devel@ffmpeg.org
>>  https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
>>  To unsubscribe, visit link above, or email
>>  ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
>
> It should be the case here, we shouldn't need reordering as NATIVE just lets 
> you specify what order the elements appear in the bitstream.

I don't get it.

bsOutputChannelPos[0] = 0
bsOutputChannelPos[1] = 1

will map to the same AVChannelLayout layout as

bsOutputChannelPos[0] = 1
bsOutptuChannelPos[1] = 0

which will be AV_CHANNEL_ORDER_NATIVE with a mask of 0x3, which means the 
first channel is LEFT, the second channel is RIGHT.

Regards,
Marton
Hendrik Leppkes May 21, 2024, 9:33 p.m. UTC | #10
On Tue, May 21, 2024 at 9:52 PM Lynne via ffmpeg-devel
<ffmpeg-devel@ffmpeg.org> wrote:
>
>
> It should be the case here, we shouldn't need reordering as NATIVE just
> lets you specify what order the elements appear in the bitstream.

NATIVE means "the FFmpeg native ordering", not "bitstream order".
CUSTOM lets you specify an arbitrary order but requires metadata to
that effect, but it makes it particularly hard to map to any standard
when playing or transcoding, so some efforts to try to unify it into a
NATIVE format is always appreciated if possible.

- Hendrik
Lynne May 21, 2024, 10:09 p.m. UTC | #11
On 21/05/2024 23:33, Hendrik Leppkes wrote:
> On Tue, May 21, 2024 at 9:52 PM Lynne via ffmpeg-devel
> <ffmpeg-devel@ffmpeg.org> wrote:
>>
>>
>> It should be the case here, we shouldn't need reordering as NATIVE just
>> lets you specify what order the elements appear in the bitstream.
> 
> NATIVE means "the FFmpeg native ordering", not "bitstream order".
> CUSTOM lets you specify an arbitrary order but requires metadata to
> that effect, but it makes it particularly hard to map to any standard
> when playing or transcoding, so some efforts to try to unify it into a
> NATIVE format is always appreciated if possible.

Right, I forgot about that, thanks.
Amended in my git repo to use Marton's code.
Marton Balint May 22, 2024, 8:15 p.m. UTC | #12
On Wed, 22 May 2024, Lynne via ffmpeg-devel wrote:

> On 21/05/2024 23:33, Hendrik Leppkes wrote:
>>  On Tue, May 21, 2024 at 9:52 PM Lynne via ffmpeg-devel
>>  <ffmpeg-devel@ffmpeg.org> wrote:
>>> 
>>>
>>>  It should be the case here, we shouldn't need reordering as NATIVE just
>>>  lets you specify what order the elements appear in the bitstream.
>>
>>  NATIVE means "the FFmpeg native ordering", not "bitstream order".
>>  CUSTOM lets you specify an arbitrary order but requires metadata to
>>  that effect, but it makes it particularly hard to map to any standard
>>  when playing or transcoding, so some efforts to try to unify it into a
>>  NATIVE format is always appreciated if possible.
>
> Right, I forgot about that, thanks.
> Amended in my git repo to use Marton's code.
>
>

> ret = av_channel_layout_custom_init(&ac->oc[1].ch_layout, nb_channels);
> if (ret < 0)
>     return ret;
> 
> for (int i = 0; i < nb_channels; i++) {
>     AVChannelCustom *cm = &ac->oc[1].ch_layout.u.map[i];
>     cm->id = usac_ch_pos_to_av[get_bits(gb, 5)]; /* bsOutputChannelPos */
>     if (cm->id)
>         cm->id = AV_CHAN_UNKNOWN;

if (cm->id == AV_CHAN_NONE)
     cm->id = AV_CHAN_UNKNOWN;

> }
> 
> ret = av_channel_layout_retype(&ac->oc[1].ch_layout,
>                                AV_CHANNEL_ORDER_NATIVE,
>                                AV_CHANNEL_LAYOUT_RETYPE_FLAG_CANONICAL);

You can simply pass 0 instead of AV_CHANNEL_ORDER_NATIVE as the order 
parameter, because AV_CHANNEL_LAYOUT_RETYPE_FLAG_CANONICAL automatically 
uses the canonical order and ignores the order parameter.

> if (ret < 0)
>     return ret;
> 
> av_channel_layout_copy(&avctx->ch_layout, &ac->oc[1].ch_layout);

Missing error check.

Thanks,
Marton
Lynne May 22, 2024, 8:25 p.m. UTC | #13
On 22/05/2024 22:15, Marton Balint wrote:
> 
> 
> On Wed, 22 May 2024, Lynne via ffmpeg-devel wrote:
> 
>> On 21/05/2024 23:33, Hendrik Leppkes wrote:
>>>  On Tue, May 21, 2024 at 9:52 PM Lynne via ffmpeg-devel
>>>  <ffmpeg-devel@ffmpeg.org> wrote:
>>>>
>>>>
>>>>  It should be the case here, we shouldn't need reordering as NATIVE 
>>>> just
>>>>  lets you specify what order the elements appear in the bitstream.
>>>
>>>  NATIVE means "the FFmpeg native ordering", not "bitstream order".
>>>  CUSTOM lets you specify an arbitrary order but requires metadata to
>>>  that effect, but it makes it particularly hard to map to any standard
>>>  when playing or transcoding, so some efforts to try to unify it into a
>>>  NATIVE format is always appreciated if possible.
>>
>> Right, I forgot about that, thanks.
>> Amended in my git repo to use Marton's code.
>>
>>
> 
>> ret = av_channel_layout_custom_init(&ac->oc[1].ch_layout, nb_channels);
>> if (ret < 0)
>>     return ret;
>>
>> for (int i = 0; i < nb_channels; i++) {
>>     AVChannelCustom *cm = &ac->oc[1].ch_layout.u.map[i];
>>     cm->id = usac_ch_pos_to_av[get_bits(gb, 5)]; /* bsOutputChannelPos */
>>     if (cm->id)
>>         cm->id = AV_CHAN_UNKNOWN;
> 
> if (cm->id == AV_CHAN_NONE)
>      cm->id = AV_CHAN_UNKNOWN;
> 
>> }
>>
>> ret = av_channel_layout_retype(&ac->oc[1].ch_layout,
>>                                AV_CHANNEL_ORDER_NATIVE,
>>                                AV_CHANNEL_LAYOUT_RETYPE_FLAG_CANONICAL);
> 
> You can simply pass 0 instead of AV_CHANNEL_ORDER_NATIVE as the order 
> parameter, because AV_CHANNEL_LAYOUT_RETYPE_FLAG_CANONICAL automatically 
> uses the canonical order and ignores the order parameter.
> 
>> if (ret < 0)
>>     return ret;
>>
>> av_channel_layout_copy(&avctx->ch_layout, &ac->oc[1].ch_layout);
> 
> Missing error check.
> 
> Thanks,
> Marton
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

Fixed both, thanks.
I'll keep AV_CHANNEL_ORDER_NATIVE as-is because its just more readable.
diff mbox series

Patch

diff --git a/libavcodec/aac/Makefile b/libavcodec/aac/Makefile
index c3e525d373..70b1dca274 100644
--- a/libavcodec/aac/Makefile
+++ b/libavcodec/aac/Makefile
@@ -2,6 +2,7 @@  clean::
 		$(RM) $(CLEANSUFFIXES:%=libavcodec/aac/%)
 
 OBJS-$(CONFIG_AAC_DECODER)          +=  aac/aacdec.o aac/aacdec_tab.o \
-                                        aac/aacdec_float.o
+                                        aac/aacdec_float.o aac/aacdec_usac.o \
+                                        aac/aacdec_ac.o aac/aacdec_lpd.o
 OBJS-$(CONFIG_AAC_FIXED_DECODER)    +=  aac/aacdec.o aac/aacdec_tab.o \
                                         aac/aacdec_fixed.o
diff --git a/libavcodec/aac/aacdec.c b/libavcodec/aac/aacdec.c
index 6f37ac5361..2b8322fc68 100644
--- a/libavcodec/aac/aacdec.c
+++ b/libavcodec/aac/aacdec.c
@@ -40,6 +40,7 @@ 
 
 #include "aacdec.h"
 #include "aacdec_tab.h"
+#include "aacdec_usac.h"
 
 #include "libavcodec/aac.h"
 #include "libavcodec/aac_defines.h"
@@ -535,6 +536,8 @@  static av_cold void flush(AVCodecContext *avctx)
             }
         }
     }
+
+    ff_aac_usac_reset_state(ac, &ac->oc[1]);
 }
 
 /**
@@ -993,13 +996,14 @@  static int decode_eld_specific_config(AACDecContext *ac, AVCodecContext *avctx,
  */
 static int decode_audio_specific_config_gb(AACDecContext *ac,
                                            AVCodecContext *avctx,
-                                           MPEG4AudioConfig *m4ac,
+                                           OutputConfiguration *oc,
                                            GetBitContext *gb,
                                            int get_bit_alignment,
                                            int sync_extension)
 {
     int i, ret;
     GetBitContext gbc = *gb;
+    MPEG4AudioConfig *m4ac = &oc->m4ac;
     MPEG4AudioConfig m4ac_bak = *m4ac;
 
     if ((i = ff_mpeg4audio_get_config_gb(m4ac, &gbc, sync_extension, avctx)) < 0) {
@@ -1033,14 +1037,22 @@  static int decode_audio_specific_config_gb(AACDecContext *ac,
     case AOT_ER_AAC_LC:
     case AOT_ER_AAC_LD:
         if ((ret = decode_ga_specific_config(ac, avctx, gb, get_bit_alignment,
-                                            m4ac, m4ac->chan_config)) < 0)
+                                             &oc->m4ac, m4ac->chan_config)) < 0)
             return ret;
         break;
     case AOT_ER_AAC_ELD:
         if ((ret = decode_eld_specific_config(ac, avctx, gb,
-                                              m4ac, m4ac->chan_config)) < 0)
+                                              &oc->m4ac, m4ac->chan_config)) < 0)
+            return ret;
+        break;
+#if CONFIG_AAC_DECODER
+    case AOT_USAC_NOSBR: /* fallthrough */
+    case AOT_USAC:
+        if ((ret = ff_aac_usac_config_decode(ac, avctx, gb,
+                                             oc, m4ac->chan_config)) < 0)
             return ret;
         break;
+#endif
     default:
         avpriv_report_missing_feature(avctx,
                                       "Audio object type %s%d",
@@ -1060,7 +1072,7 @@  static int decode_audio_specific_config_gb(AACDecContext *ac,
 
 static int decode_audio_specific_config(AACDecContext *ac,
                                         AVCodecContext *avctx,
-                                        MPEG4AudioConfig *m4ac,
+                                        OutputConfiguration *oc,
                                         const uint8_t *data, int64_t bit_size,
                                         int sync_extension)
 {
@@ -1080,7 +1092,7 @@  static int decode_audio_specific_config(AACDecContext *ac,
     if ((ret = init_get_bits(&gb, data, bit_size)) < 0)
         return ret;
 
-    return decode_audio_specific_config_gb(ac, avctx, m4ac, &gb, 0,
+    return decode_audio_specific_config_gb(ac, avctx, oc, &gb, 0,
                                            sync_extension);
 }
 
@@ -1104,6 +1116,15 @@  static av_cold int decode_close(AVCodecContext *avctx)
 {
     AACDecContext *ac = avctx->priv_data;
 
+    for (int i = 0; i < 2; i++) {
+        OutputConfiguration *oc = &ac->oc[i];
+        AACUSACConfig *usac = &oc->usac;
+        for (int j = 0; j < usac->nb_elems; j++) {
+            AACUsacElemConfig *ec = &usac->elems[i];
+            av_freep(&ec->ext.pl_data);
+        }
+    }
+
     for (int type = 0; type < FF_ARRAY_ELEMS(ac->che); type++) {
         for (int i = 0; i < MAX_ELEM_ID; i++) {
             if (ac->che[type][i]) {
@@ -1181,7 +1202,7 @@  av_cold int ff_aac_decode_init(AVCodecContext *avctx)
     ac->oc[1].m4ac.sample_rate = avctx->sample_rate;
 
     if (avctx->extradata_size > 0) {
-        if ((ret = decode_audio_specific_config(ac, ac->avctx, &ac->oc[1].m4ac,
+        if ((ret = decode_audio_specific_config(ac, ac->avctx, &ac->oc[1],
                                                 avctx->extradata,
                                                 avctx->extradata_size * 8LL,
                                                 1)) < 0)
@@ -1549,9 +1570,16 @@  static int decode_pulses(Pulse *pulse, GetBitContext *gb,
 int ff_aac_decode_tns(AACDecContext *ac, TemporalNoiseShaping *tns,
                       GetBitContext *gb, const IndividualChannelStream *ics)
 {
+    int tns_max_order = INT32_MAX;
+    const int is_usac = ac->oc[1].m4ac.object_type == AOT_USAC ||
+                        ac->oc[1].m4ac.object_type == AOT_USAC_NOSBR;
     int w, filt, i, coef_len, coef_res, coef_compress;
     const int is8 = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE;
-    const int tns_max_order = is8 ? 7 : ac->oc[1].m4ac.object_type == AOT_AAC_MAIN ? 20 : 12;
+
+    /* USAC doesn't seem to have a limit */
+    if (!is_usac)
+        tns_max_order = is8 ? 7 : ac->oc[1].m4ac.object_type == AOT_AAC_MAIN ? 20 : 12;
+
     for (w = 0; w < ics->num_windows; w++) {
         if ((tns->n_filt[w] = get_bits(gb, 2 - is8))) {
             coef_res = get_bits1(gb);
@@ -1560,7 +1588,12 @@  int ff_aac_decode_tns(AACDecContext *ac, TemporalNoiseShaping *tns,
                 int tmp2_idx;
                 tns->length[w][filt] = get_bits(gb, 6 - 2 * is8);
 
-                if ((tns->order[w][filt] = get_bits(gb, 5 - 2 * is8)) > tns_max_order) {
+                if (is_usac)
+                    tns->order[w][filt] = get_bits(gb, 4 - is8);
+                else
+                    tns->order[w][filt] = get_bits(gb, 5 - (2 * is8));
+
+                if (tns->order[w][filt] > tns_max_order) {
                     av_log(ac->avctx, AV_LOG_ERROR,
                            "TNS filter order %d is greater than maximum %d.\n",
                            tns->order[w][filt], tns_max_order);
@@ -1598,6 +1631,7 @@  static void decode_mid_side_stereo(ChannelElement *cpe, GetBitContext *gb,
 {
     int idx;
     int max_idx = cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb;
+    cpe->max_sfb_ste = cpe->ch[0].ics.max_sfb;
     if (ms_present == 1) {
         for (idx = 0; idx < max_idx; idx++)
             cpe->ms_mask[idx] = get_bits1(gb);
@@ -2182,42 +2216,19 @@  static int aac_decode_er_frame(AVCodecContext *avctx, AVFrame *frame,
     return 0;
 }
 
-static int aac_decode_frame_int(AVCodecContext *avctx, AVFrame *frame,
-                                int *got_frame_ptr, GetBitContext *gb,
-                                const AVPacket *avpkt)
+static int decode_frame_ga(AVCodecContext *avctx, AACDecContext *ac,
+                           GetBitContext *gb, int *got_frame_ptr)
 {
-    AACDecContext *ac = avctx->priv_data;
-    ChannelElement *che = NULL, *che_prev = NULL;
+    int err;
+    int is_dmono;
+    int elem_id;
     enum RawDataBlockType elem_type, che_prev_type = TYPE_END;
-    int err, elem_id;
-    int samples = 0, multiplier, audio_found = 0, pce_found = 0;
-    int is_dmono, sce_count = 0;
-    int payload_alignment;
     uint8_t che_presence[4][MAX_ELEM_ID] = {{0}};
+    ChannelElement *che = NULL, *che_prev = NULL;
+    int samples = 0, multiplier, audio_found = 0, pce_found = 0, sce_count = 0;
+    AVFrame *frame = ac->frame;
 
-    ac->frame = frame;
-
-    if (show_bits(gb, 12) == 0xfff) {
-        if ((err = parse_adts_frame_header(ac, gb)) < 0) {
-            av_log(avctx, AV_LOG_ERROR, "Error decoding AAC frame header.\n");
-            goto fail;
-        }
-        if (ac->oc[1].m4ac.sampling_index > 12) {
-            av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->oc[1].m4ac.sampling_index);
-            err = AVERROR_INVALIDDATA;
-            goto fail;
-        }
-    }
-
-    if ((err = frame_configure_elements(avctx)) < 0)
-        goto fail;
-
-    // The AV_PROFILE_AAC_* defines are all object_type - 1
-    // This may lead to an undefined profile being signaled
-    ac->avctx->profile = ac->oc[1].m4ac.object_type - 1;
-
-    payload_alignment = get_bits_count(gb);
-    ac->tags_mapped = 0;
+    int payload_alignment = get_bits_count(gb);
     // parse
     while ((elem_type = get_bits(gb, 3)) != TYPE_END) {
         elem_id = get_bits(gb, 4);
@@ -2225,28 +2236,23 @@  static int aac_decode_frame_int(AVCodecContext *avctx, AVFrame *frame,
         if (avctx->debug & FF_DEBUG_STARTCODE)
             av_log(avctx, AV_LOG_DEBUG, "Elem type:%x id:%x\n", elem_type, elem_id);
 
-        if (!avctx->ch_layout.nb_channels && elem_type != TYPE_PCE) {
-            err = AVERROR_INVALIDDATA;
-            goto fail;
-        }
+        if (!avctx->ch_layout.nb_channels && elem_type != TYPE_PCE)
+            return AVERROR_INVALIDDATA;
 
         if (elem_type < TYPE_DSE) {
             if (che_presence[elem_type][elem_id]) {
                 int error = che_presence[elem_type][elem_id] > 1;
                 av_log(ac->avctx, error ? AV_LOG_ERROR : AV_LOG_DEBUG, "channel element %d.%d duplicate\n",
                        elem_type, elem_id);
-                if (error) {
-                    err = AVERROR_INVALIDDATA;
-                    goto fail;
-                }
+                if (error)
+                    return AVERROR_INVALIDDATA;
             }
             che_presence[elem_type][elem_id]++;
 
             if (!(che=ff_aac_get_che(ac, elem_type, elem_id))) {
                 av_log(ac->avctx, AV_LOG_ERROR, "channel element %d.%d is not allocated\n",
                        elem_type, elem_id);
-                err = AVERROR_INVALIDDATA;
-                goto fail;
+                return AVERROR_INVALIDDATA;
             }
             samples = ac->oc[1].m4ac.frame_length_short ? 960 : 1024;
             che->present = 1;
@@ -2283,10 +2289,8 @@  static int aac_decode_frame_int(AVCodecContext *avctx, AVFrame *frame,
             int tags;
 
             int pushed = push_output_configuration(ac);
-            if (pce_found && !pushed) {
-                err = AVERROR_INVALIDDATA;
-                goto fail;
-            }
+            if (pce_found && !pushed)
+                return AVERROR_INVALIDDATA;
 
             tags = decode_pce(avctx, &ac->oc[1].m4ac, layout_map, gb,
                               payload_alignment);
@@ -2312,8 +2316,7 @@  static int aac_decode_frame_int(AVCodecContext *avctx, AVFrame *frame,
                 elem_id += get_bits(gb, 8) - 1;
             if (get_bits_left(gb) < 8 * elem_id) {
                     av_log(avctx, AV_LOG_ERROR, "TYPE_FIL: "overread_err);
-                    err = AVERROR_INVALIDDATA;
-                    goto fail;
+                    return AVERROR_INVALIDDATA;
             }
             err = 0;
             while (elem_id > 0) {
@@ -2337,19 +2340,16 @@  static int aac_decode_frame_int(AVCodecContext *avctx, AVFrame *frame,
         }
 
         if (err)
-            goto fail;
+            return err;
 
         if (get_bits_left(gb) < 3) {
             av_log(avctx, AV_LOG_ERROR, overread_err);
-            err = AVERROR_INVALIDDATA;
-            goto fail;
+            return AVERROR_INVALIDDATA;
         }
     }
 
-    if (!avctx->ch_layout.nb_channels) {
-        *got_frame_ptr = 0;
+    if (!avctx->ch_layout.nb_channels)
         return 0;
-    }
 
     multiplier = (ac->oc[1].m4ac.sbr == 1) ? ac->oc[1].m4ac.ext_sample_rate > ac->oc[1].m4ac.sample_rate : 0;
     samples <<= multiplier;
@@ -2364,16 +2364,17 @@  static int aac_decode_frame_int(AVCodecContext *avctx, AVFrame *frame,
 
     if (!ac->frame->data[0] && samples) {
         av_log(avctx, AV_LOG_ERROR, "no frame data found\n");
-        err = AVERROR_INVALIDDATA;
-        goto fail;
+        return AVERROR_INVALIDDATA;
     }
 
     if (samples) {
         ac->frame->nb_samples = samples;
         ac->frame->sample_rate = avctx->sample_rate;
-    } else
+        *got_frame_ptr = 1;
+    } else {
         av_frame_unref(ac->frame);
-    *got_frame_ptr = !!samples;
+        *got_frame_ptr = 0;
+    }
 
     /* for dual-mono audio (SCE + SCE) */
     is_dmono = ac->dmono_mode && sce_count == 2 &&
@@ -2387,6 +2388,59 @@  static int aac_decode_frame_int(AVCodecContext *avctx, AVFrame *frame,
     }
 
     return 0;
+}
+
+static int aac_decode_frame_int(AVCodecContext *avctx, AVFrame *frame,
+                                int *got_frame_ptr, GetBitContext *gb,
+                                const AVPacket *avpkt)
+{
+    int err;
+    AACDecContext *ac = avctx->priv_data;
+
+    ac->frame = frame;
+    *got_frame_ptr = 0;
+
+    if (show_bits(gb, 12) == 0xfff) {
+        if ((err = parse_adts_frame_header(ac, gb)) < 0) {
+            av_log(avctx, AV_LOG_ERROR, "Error decoding AAC frame header.\n");
+            goto fail;
+        }
+        if (ac->oc[1].m4ac.sampling_index > 12) {
+            av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->oc[1].m4ac.sampling_index);
+            err = AVERROR_INVALIDDATA;
+            goto fail;
+        }
+    }
+
+    if ((err = frame_configure_elements(avctx)) < 0)
+        goto fail;
+
+    // The AV_PROFILE_AAC_* defines are all object_type - 1
+    // This may lead to an undefined profile being signaled
+    ac->avctx->profile = ac->oc[1].m4ac.object_type - 1;
+
+    ac->tags_mapped = 0;
+
+    if ((ac->oc[1].m4ac.object_type == AOT_USAC) ||
+        (ac->oc[1].m4ac.object_type == AOT_USAC_NOSBR)) {
+        if (ac->is_fixed) {
+            avpriv_report_missing_feature(ac->avctx,
+                                          "AAC USAC fixed-point decoding");
+            return AVERROR_PATCHWELCOME;
+        }
+#if CONFIG_AAC_DECODER
+        err = ff_aac_usac_decode_frame(avctx, ac, gb, got_frame_ptr);
+        if (err < 0)
+            goto fail;
+#endif
+    } else {
+        err = decode_frame_ga(avctx, ac, gb, got_frame_ptr);
+        if (err < 0)
+            goto fail;
+    }
+
+    return err;
+
 fail:
     pop_output_configuration(ac);
     return err;
@@ -2414,7 +2468,7 @@  static int aac_decode_frame(AVCodecContext *avctx, AVFrame *frame,
     if (new_extradata) {
         /* discard previous configuration */
         ac->oc[1].status = OC_NONE;
-        err = decode_audio_specific_config(ac, ac->avctx, &ac->oc[1].m4ac,
+        err = decode_audio_specific_config(ac, ac->avctx, &ac->oc[1],
                                            new_extradata,
                                            new_extradata_size * 8LL, 1);
         if (err < 0) {
diff --git a/libavcodec/aac/aacdec.h b/libavcodec/aac/aacdec.h
index 8d1eb74066..ee21a94007 100644
--- a/libavcodec/aac/aacdec.h
+++ b/libavcodec/aac/aacdec.h
@@ -42,6 +42,8 @@ 
 #include "libavcodec/avcodec.h"
 #include "libavcodec/mpeg4audio.h"
 
+#include "aacdec_ac.h"
+
 typedef struct AACDecContext AACDecContext;
 
 /**
@@ -69,6 +71,32 @@  enum CouplingPoint {
     AFTER_IMDCT = 3,
 };
 
+enum AACUsacElem {
+    ID_USAC_SCE = 0,
+    ID_USAC_CPE = 1,
+    ID_USAC_LFE = 2,
+    ID_USAC_EXT = 3,
+};
+
+enum ExtensionHeaderType {
+    ID_CONFIG_EXT_FILL = 0,
+    ID_CONFIG_EXT_LOUDNESS_INFO = 2,
+    ID_CONFIG_EXT_STREAM_ID = 7,
+};
+
+enum AACUsacExtension {
+    ID_EXT_ELE_FILL,
+    ID_EXT_ELE_MPEGS,
+    ID_EXT_ELE_SAOC,
+    ID_EXT_ELE_AUDIOPREROLL,
+    ID_EXT_ELE_UNI_DRC,
+};
+
+enum AACUSACLoudnessExt {
+    UNIDRCLOUDEXT_TERM = 0x0,
+    UNIDRCLOUDEXT_EQ = 0x1,
+};
+
 // Supposed to be equal to AAC_RENAME() in case of USE_FIXED.
 #define RENAME_FIXED(name) name ## _fixed
 
@@ -93,6 +121,40 @@  typedef struct LongTermPrediction {
     int8_t used[MAX_LTP_LONG_SFB];
 } LongTermPrediction;
 
+/* Per channel core mode */
+typedef struct AACUsacElemData {
+    uint8_t core_mode;
+    uint8_t scale_factor_grouping;
+
+    /* Timewarping ratio */
+#define NUM_TW_NODES 16
+    uint8_t tw_ratio[NUM_TW_NODES];
+
+    struct {
+        uint8_t acelp_core_mode : 3;
+        uint8_t lpd_mode : 5;
+
+        uint8_t bpf_control_info : 1;
+        uint8_t core_mode_last : 1;
+        uint8_t fac_data_present : 1;
+
+        int last_lpd_mode;
+    } ldp;
+
+    struct {
+        unsigned int seed;
+        uint8_t level : 3;
+        uint8_t offset : 5;
+    } noise;
+
+    struct {
+        uint8_t gain;
+        uint32_t kv[8 /* (1024 / 16) / 8 */][8];
+    } fac;
+
+    AACArithState ac;
+} AACUsacElemData;
+
 /**
  * Individual Channel Stream
  */
@@ -145,11 +207,13 @@  typedef struct ChannelCoupling {
  */
 typedef struct SingleChannelElement {
     IndividualChannelStream ics;
+    AACUsacElemData ue;                             ///< USAC element data
     TemporalNoiseShaping tns;
     enum BandType band_type[128];                   ///< band types
     int sfo[128];                                   ///< scalefactor offsets
     INTFLOAT_UNION(sf, [128]);                      ///< scalefactors (8 windows * 16 sfb max)
     INTFLOAT_ALIGNED_UNION(32, coeffs,    1024);    ///< coefficients for IMDCT, maybe processed
+    INTFLOAT_ALIGNED_UNION(32, prev_coeffs, 1024);  ///< unscaled previous contents of coeffs[] for USAC
     INTFLOAT_ALIGNED_UNION(32, saved,     1536);    ///< overlap
     INTFLOAT_ALIGNED_UNION(32, ret_buf,   2048);    ///< PCM output buffer
     INTFLOAT_ALIGNED_UNION(16, ltp_state, 3072);    ///< time signal for LTP
@@ -163,25 +227,148 @@  typedef struct SingleChannelElement {
     };
 } SingleChannelElement;
 
+typedef struct AACUsacStereo {
+    uint8_t common_window;
+    uint8_t common_tw;
+
+    uint8_t ms_mask_mode;
+    uint8_t config_idx;
+
+    /* Complex prediction */
+    uint8_t use_prev_frame;
+    uint8_t pred_dir;
+    uint8_t complex_coef;
+
+    uint8_t pred_used[128];
+
+    INTFLOAT_ALIGNED_UNION(32, alpha_q_re, 1024);
+    INTFLOAT_ALIGNED_UNION(32, alpha_q_im, 1024);
+    INTFLOAT_ALIGNED_UNION(32, prev_alpha_q_re, 1024);
+    INTFLOAT_ALIGNED_UNION(32, prev_alpha_q_im, 1024);
+
+    INTFLOAT_ALIGNED_UNION(32, dmix_re, 1024);
+    INTFLOAT_ALIGNED_UNION(32, prev_dmix_re, 1024); /* Recalculated on every frame */
+    INTFLOAT_ALIGNED_UNION(32, dmix_im, 1024); /* Final prediction data */
+} AACUsacStereo;
+
 /**
  * channel element - generic struct for SCE/CPE/CCE/LFE
  */
 typedef struct ChannelElement {
     int present;
     // CPE specific
+    uint8_t max_sfb_ste;      ///< (USAC) Maximum of both max_sfb values
     uint8_t ms_mask[128];     ///< Set if mid/side stereo is used for each scalefactor window band
     // shared
     SingleChannelElement ch[2];
     // CCE specific
     ChannelCoupling coup;
+    // USAC stereo coupling data
+    AACUsacStereo us;
 } ChannelElement;
 
+typedef struct AACUSACLoudnessInfo {
+    uint8_t drc_set_id : 6;
+    uint8_t downmix_id : 7;
+    struct {
+        uint16_t lvl : 12;
+        uint8_t present : 1;
+    } sample_peak;
+
+    struct {
+        uint16_t lvl : 12;
+        uint8_t measurement : 4;
+        uint8_t reliability : 2;
+        uint8_t present : 1;
+    } true_peak;
+
+    uint8_t nb_measurements : 4;
+    struct {
+        uint8_t method_def : 4;
+        uint8_t method_val;
+        uint8_t measurement : 4;
+        uint8_t reliability : 2;
+    } measurements[16];
+} AACUSACLoudnessInfo;
+
+typedef struct AACUsacElemConfig {
+    enum AACUsacElem type;
+
+    uint8_t tw_mdct : 1;
+    uint8_t noise_fill : 1;
+
+    uint8_t stereo_config_index;
+
+    struct {
+        int ratio;
+
+        uint8_t harmonic_sbr : 1; /* harmonicSBR */
+        uint8_t bs_intertes : 1; /* bs_interTes */
+        uint8_t bs_pvc : 1; /* bs_pvc */
+
+        struct {
+            uint8_t start_freq; /* dflt_start_freq */
+            uint8_t stop_freq; /* dflt_stop_freq */
+
+            uint8_t freq_scale; /* dflt_freq_scale */
+            uint8_t alter_scale : 1; /* dflt_alter_scale */
+            uint8_t noise_scale; /* dflt_noise_scale */
+
+            uint8_t limiter_bands; /* dflt_limiter_bands */
+            uint8_t limiter_gains; /* dflt_limiter_gains */
+            uint8_t interpol_freq : 1; /* dflt_interpol_freq */
+            uint8_t smoothing_mode : 1; /* dflt_smoothing_mode */
+        } dflt;
+    } sbr;
+
+    struct {
+        uint8_t freq_res; /* bsFreqRes */
+        uint8_t fixed_gain; /* bsFixedGainDMX */
+        uint8_t temp_shape_config; /* bsTempShapeConfig */
+        uint8_t decorr_config; /* bsDecorrConfig */
+        uint8_t high_rate_mode : 1; /* bsHighRateMode */
+        uint8_t phase_coding : 1; /* bsPhaseCoding */
+
+        uint8_t otts_bands_phase; /* bsOttBandsPhase */
+        uint8_t residual_coding; /* bsResidualCoding */
+        uint8_t residual_bands; /* bsResidualBands */
+        uint8_t pseudo_lr : 1; /* bsPseudoLr */
+        uint8_t env_quant_mode : 1; /* bsEnvQuantMode */
+    } mps;
+
+    struct {
+        enum AACUsacExtension type;
+        uint8_t payload_frag;
+        uint32_t default_len;
+        uint32_t pl_data_offset;
+        uint8_t *pl_data;
+    } ext;
+} AACUsacElemConfig;
+
+typedef struct AACUSACConfig {
+    uint8_t core_sbr_frame_len_idx; /* coreSbrFrameLengthIndex */
+    uint8_t rate_idx;
+    uint16_t core_frame_len;
+    uint16_t stream_identifier;
+
+    AACUsacElemConfig elems[64];
+    int nb_elems;
+
+    struct {
+        uint8_t nb_album;
+        AACUSACLoudnessInfo album_info[64];
+        uint8_t nb_info;
+        AACUSACLoudnessInfo info[64];
+    } loudness;
+} AACUSACConfig;
+
 typedef struct OutputConfiguration {
     MPEG4AudioConfig m4ac;
     uint8_t layout_map[MAX_ELEM_ID*4][3];
     int layout_map_tags;
     AVChannelLayout ch_layout;
     enum OCStatus status;
+    AACUSACConfig usac;
 } OutputConfiguration;
 
 /**
diff --git a/libavcodec/aac/aacdec_ac.c b/libavcodec/aac/aacdec_ac.c
new file mode 100644
index 0000000000..7e5077cd19
--- /dev/null
+++ b/libavcodec/aac/aacdec_ac.c
@@ -0,0 +1,208 @@ 
+/*
+ * AAC definitions and structures
+ * Copyright (c) 2024 Lynne
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/aactab.h"
+#include "aacdec_ac.h"
+
+uint32_t ff_aac_ac_map_process(AACArithState *state, int reset, int N)
+{
+    float ratio;
+    if (reset) {
+        memset(state->last, 0, sizeof(state->last));
+        state->last_len = N;
+    } else if (state->last_len != N) {
+        int i;
+        uint8_t last[512 /* 2048 / 4 */];
+        memcpy(last, state->last, sizeof(last));
+
+        ratio = state->last_len / (float)N;
+        for (i = 0; i < N/2; i++) {
+            int k = (int)(i * ratio);
+            state->last[i] = last[k];
+        }
+
+        for (; i < FF_ARRAY_ELEMS(state->last); i++)
+            state->last[i] = 0;
+
+        state->last_len = N;
+    }
+
+    state->cur[3] = 0;
+    state->cur[2] = 0;
+    state->cur[1] = 0;
+    state->cur[0] = 1;
+
+    state->state_pre = state->last[0] << 12;
+    return state->last[0] << 12;
+}
+
+uint32_t ff_aac_ac_get_context(AACArithState *state, uint32_t c, int i, int N)
+{
+    c = state->state_pre >> 8;
+    c = c + (state->last[i + 1] << 8);
+    c = (c << 4);
+    c += state->cur[1];
+
+    state->state_pre = c;
+
+    if (i > 3 &&
+        ((state->cur[3] + state->cur[2] + state->cur[1]) < 5))
+        return c + 0x10000;
+
+    return c;
+}
+
+uint32_t ff_aac_ac_get_pk(uint32_t c)
+{
+    int i_min = -1;
+    int i, j;
+    int i_max = FF_ARRAY_ELEMS(ff_aac_ac_lookup_m) - 1;
+    while ((i_max - i_min) > 1) {
+        i = i_min + ((i_max - i_min) / 2);
+        j = ff_aac_ac_hash_m[i];
+        if (c < (j >> 8))
+            i_max = i;
+        else if (c > (j >> 8))
+            i_min = i;
+        else
+            return (j & 0xFF);
+    }
+    return ff_aac_ac_lookup_m[i_max];
+}
+
+void ff_aac_ac_update_context(AACArithState *state, int idx,
+                              uint16_t a, uint16_t b)
+{
+    state->cur[0] = a + b + 1;
+    if (state->cur[0] > 0xF)
+        state->cur[0] = 0xF;
+
+    state->cur[3] = state->cur[2];
+    state->cur[2] = state->cur[1];
+    state->cur[1] = state->cur[0];
+
+    state->last[idx] = state->cur[0];
+}
+
+/* Initialize AC */
+void ff_aac_ac_init(AACArith *ac, GetBitContext *gb)
+{
+    ac->low = 0;
+    ac->high = UINT16_MAX;
+    ac->val = get_bits(gb, 16);
+}
+
+uint16_t ff_aac_ac_decode(AACArith *ac, GetBitContext *gb,
+                          const uint16_t *cdf, uint16_t cdf_len)
+{
+    int val = ac->val;
+    int low = ac->low;
+    int high = ac->high;
+
+    int sym;
+    int rng = high - low + 1;
+    int c = ((((int)(val - low + 1)) << 14) - ((int)1));
+
+    const uint16_t *p = cdf - 1;
+
+    /* One for each possible CDF length in the spec */
+    switch (cdf_len) {
+    case 2:
+        if ((p[1] * rng) > c)
+            p += 1;
+        break;
+    case 4:
+        if ((p[2] * rng) > c)
+            p += 2;
+        if ((p[1] * rng) > c)
+            p += 1;
+        break;
+    case 17:
+        /* First check if the current probability is even met at all */
+        if ((p[1] * rng) <= c)
+            break;
+        p += 1;
+        for (int i = 8; i >= 1; i >>= 1)
+            if ((p[i] * rng) > c)
+                p += i;
+        break;
+    case 27:
+        if ((p[16] * rng) > c)
+            p += 16;
+        if ((p[8] * rng) > c)
+            p += 8;
+        if (p != (cdf - 1 + 24))
+            if ((p[4] * rng) > c)
+                p += 4;
+        if ((p[2] * rng) > c)
+            p += 2;
+
+        if (p != (cdf - 1 + 24 + 2))
+            if ((p[1] * rng) > c)
+                p += 1;
+        break;
+    default:
+        /* This should never happen */
+        av_assert2(0);
+    }
+
+    sym = (int)((ptrdiff_t)(p - cdf)) + 1;
+    if (sym)
+        high = low + ((rng * cdf[sym - 1]) >> 14) - 1;
+    low += (rng * cdf[sym]) >> 14;
+
+    /* This loop could be done faster */
+    while (1) {
+        if (high < 32768) {
+            ;
+        } else if (low >= 32768) {
+            val -= 32768;
+            low -= 32768;
+            high -= 32768;
+        } else if (low >= 16384 && high < 49152) {
+            val -= 16384;
+            low -= 16384;
+            high -= 16384;
+        } else {
+            break;
+        }
+        low += low;
+        high += high + 1;
+        val = (val << 1) | get_bits1(gb);
+    };
+
+    ac->low = low;
+    ac->high = high;
+    ac->val = val;
+
+    return sym;
+}
+
+void ff_aac_ac_finish(AACArithState *state, int offset, int N)
+{
+    int i;
+
+    for (i = offset; i < N/2; i++)
+        state->last[i] = 1;
+
+    for (; i < FF_ARRAY_ELEMS(state->last); i++)
+        state->last[i] = 0;
+}
diff --git a/libavcodec/aac/aacdec_ac.h b/libavcodec/aac/aacdec_ac.h
new file mode 100644
index 0000000000..ef96bed770
--- /dev/null
+++ b/libavcodec/aac/aacdec_ac.h
@@ -0,0 +1,54 @@ 
+/*
+ * AAC definitions and structures
+ * Copyright (c) 2024 Lynne
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AACDEC_AC_H
+#define AVCODEC_AACDEC_AC_H
+
+#include "libavcodec/get_bits.h"
+
+typedef struct AACArithState {
+    uint8_t last[512 /* 2048 / 4 */];
+    int last_len;
+    uint8_t cur[4];
+    uint16_t state_pre;
+} AACArithState;
+
+typedef struct AACArith {
+    uint16_t low;
+    uint16_t high;
+    uint16_t val;
+} AACArith;
+
+#define FF_AAC_AC_ESCAPE 16
+
+uint32_t ff_aac_ac_map_process(AACArithState *state, int reset, int len);
+uint32_t ff_aac_ac_get_context(AACArithState *state, uint32_t old_c, int idx, int len);
+uint32_t ff_aac_ac_get_pk(uint32_t c);
+
+void ff_aac_ac_update_context(AACArithState *state, int idx, uint16_t a, uint16_t b);
+void ff_aac_ac_init(AACArith *ac, GetBitContext *gb);
+
+uint16_t ff_aac_ac_decode(AACArith *ac, GetBitContext *gb,
+                          const uint16_t *cdf, uint16_t cdf_len);
+
+void ff_aac_ac_finish(AACArithState *state, int offset, int nb);
+
+#endif /* AVCODEC_AACDEC_AC_H */
diff --git a/libavcodec/aac/aacdec_dsp_template.c b/libavcodec/aac/aacdec_dsp_template.c
index 59a69d88f3..8d31af22f8 100644
--- a/libavcodec/aac/aacdec_dsp_template.c
+++ b/libavcodec/aac/aacdec_dsp_template.c
@@ -88,8 +88,8 @@  static void AAC_RENAME(apply_mid_side_stereo)(AACDecContext *ac, ChannelElement
     INTFLOAT *ch1 = cpe->ch[1].AAC_RENAME(coeffs);
     const uint16_t *offsets = ics->swb_offset;
     for (int g = 0; g < ics->num_window_groups; g++) {
-        for (int sfb = 0; sfb < ics->max_sfb; sfb++) {
-            const int idx = g*ics->max_sfb + sfb;
+        for (int sfb = 0; sfb < cpe->max_sfb_ste; sfb++) {
+            const int idx = g*cpe->max_sfb_ste + sfb;
             if (cpe->ms_mask[idx] &&
                 cpe->ch[0].band_type[idx] < NOISE_BT &&
                 cpe->ch[1].band_type[idx] < NOISE_BT) {
diff --git a/libavcodec/aac/aacdec_latm.h b/libavcodec/aac/aacdec_latm.h
index e40a2fe1a7..047c11e0fb 100644
--- a/libavcodec/aac/aacdec_latm.h
+++ b/libavcodec/aac/aacdec_latm.h
@@ -56,7 +56,8 @@  static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
 {
     AACDecContext *ac     = &latmctx->aac_ctx;
     AVCodecContext *avctx = ac->avctx;
-    MPEG4AudioConfig m4ac = { 0 };
+    OutputConfiguration oc = { 0 };
+    MPEG4AudioConfig *m4ac = &oc.m4ac;
     GetBitContext gbc;
     int config_start_bit  = get_bits_count(gb);
     int sync_extension    = 0;
@@ -76,7 +77,7 @@  static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
     if (get_bits_left(gb) <= 0)
         return AVERROR_INVALIDDATA;
 
-    bits_consumed = decode_audio_specific_config_gb(NULL, avctx, &m4ac,
+    bits_consumed = decode_audio_specific_config_gb(NULL, avctx, &oc,
                                                     &gbc, config_start_bit,
                                                     sync_extension);
 
@@ -88,11 +89,12 @@  static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
       asclen = bits_consumed;
 
     if (!latmctx->initialized ||
-        ac->oc[1].m4ac.sample_rate != m4ac.sample_rate ||
-        ac->oc[1].m4ac.chan_config != m4ac.chan_config) {
+        ac->oc[1].m4ac.sample_rate != m4ac->sample_rate ||
+        ac->oc[1].m4ac.chan_config != m4ac->chan_config) {
 
         if (latmctx->initialized) {
-            av_log(avctx, AV_LOG_INFO, "audio config changed (sample_rate=%d, chan_config=%d)\n", m4ac.sample_rate, m4ac.chan_config);
+            av_log(avctx, AV_LOG_INFO, "audio config changed (sample_rate=%d, chan_config=%d)\n",
+                   m4ac->sample_rate, m4ac->chan_config);
         } else {
             av_log(avctx, AV_LOG_DEBUG, "initializing latmctx\n");
         }
@@ -280,7 +282,7 @@  static int latm_decode_frame(AVCodecContext *avctx, AVFrame *out,
         } else {
             push_output_configuration(&latmctx->aac_ctx);
             if ((err = decode_audio_specific_config(
-                    &latmctx->aac_ctx, avctx, &latmctx->aac_ctx.oc[1].m4ac,
+                    &latmctx->aac_ctx, avctx, &latmctx->aac_ctx.oc[1],
                     avctx->extradata, avctx->extradata_size*8LL, 1)) < 0) {
                 pop_output_configuration(&latmctx->aac_ctx);
                 return err;
diff --git a/libavcodec/aac/aacdec_lpd.c b/libavcodec/aac/aacdec_lpd.c
new file mode 100644
index 0000000000..796edd2ab5
--- /dev/null
+++ b/libavcodec/aac/aacdec_lpd.c
@@ -0,0 +1,198 @@ 
+/*
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "aacdec_lpd.h"
+#include "aacdec_usac.h"
+#include "libavcodec/unary.h"
+
+const uint8_t ff_aac_lpd_mode_tab[32][4] = {
+    { 0, 0, 0, 0 },
+    { 1, 0, 0, 0 },
+    { 0, 1, 0, 0 },
+    { 1, 1, 0, 0 },
+    { 0, 0, 1, 0 },
+    { 1, 0, 1, 0 },
+    { 0, 1, 1, 0 },
+    { 1, 1, 1, 0 },
+    { 0, 0, 0, 1 },
+    { 1, 0, 0, 1 },
+    { 0, 1, 0, 1 },
+    { 1, 1, 0, 1 },
+    { 0, 0, 1, 1 },
+    { 1, 0, 1, 1 },
+    { 0, 1, 1, 1 },
+    { 1, 1, 1, 1 },
+    { 2, 2, 0, 0 },
+    { 2, 2, 1, 0 },
+    { 2, 2, 0, 1 },
+    { 2, 2, 1, 1 },
+    { 0, 0, 2, 2 },
+    { 1, 0, 2, 2 },
+    { 0, 1, 2, 2 },
+    { 1, 1, 2, 2 },
+    { 2, 2, 2, 2 },
+    { 3, 3, 3, 3 },
+    /* Larger values are reserved, but permit them for resilience */
+    { 0, 0, 0, 0 },
+    { 0, 0, 0, 0 },
+    { 0, 0, 0, 0 },
+    { 0, 0, 0, 0 },
+    { 0, 0, 0, 0 },
+    { 0, 0, 0, 0 },
+};
+
+static void parse_qn(GetBitContext *gb, int *qn, int nk_mode, int no_qn)
+{
+    if (nk_mode == 1) {
+        for (int k = 0; k < no_qn; k++) {
+            qn[k] = get_unary(gb, 0, INT32_MAX); // TODO: find proper ranges
+            if (qn[k])
+                qn[k]++;
+        }
+        return;
+    }
+
+    for (int k = 0; k < no_qn; k++)
+        qn[k] = get_bits(gb, 2) + 2;
+
+    if (nk_mode == 2) {
+        for (int k = 0; k < no_qn; k++) {
+            if (qn[k] > 4) {
+                qn[k] = get_unary(gb, 0, INT32_MAX);;
+                if (qn[k])
+                    qn[k] += 4;
+            }
+        }
+        return;
+    }
+
+    for (int k = 0; k < no_qn; k++) {
+        if (qn[k] > 4) {
+            int qn_ext = get_unary(gb, 0, INT32_MAX);;
+            switch (qn_ext) {
+            case 0: qn[k] = 5; break;
+            case 1: qn[k] = 6; break;
+            case 2: qn[k] = 0; break;
+            default: qn[k] = qn_ext + 4; break;
+            }
+        }
+    }
+}
+
+static int parse_codebook_idx(GetBitContext *gb, uint32_t *kv,
+                              int nk_mode, int no_qn)
+{
+    int idx, n, nk;
+
+    int qn[2];
+    parse_qn(gb, qn, nk_mode, no_qn);
+
+    for (int k = 0; k < no_qn; k++) {
+        if (qn[k] > 4) {
+            nk = (qn[k] - 3) / 2;
+            n = qn[k] - nk*2;
+        } else {
+            nk = 0;
+            n = qn[k];
+        }
+    }
+
+    idx = get_bits(gb, 4*n);
+
+    if (nk > 0)
+        for (int i = 0; i < 8; i++)
+            kv[i] = get_bits(gb, nk);
+
+    return 0;
+}
+
+int ff_aac_parse_fac_data(AACUsacElemData *ce, GetBitContext *gb,
+                          int use_gain, int len)
+{
+    int ret;
+    if (use_gain)
+        ce->fac.gain = get_bits(gb, 7);
+
+    for (int i = 0; i < len/8; i++) {
+        ret = parse_codebook_idx(gb, ce->fac.kv[i], 1, 1);
+        if (ret < 0)
+            return ret;
+    }
+
+    return 0;
+}
+
+int ff_aac_ldp_parse_channel_stream(AACDecContext *ac, AACUSACConfig *usac,
+                                    AACUsacElemData *ce, GetBitContext *gb)
+{
+    int k;
+    const uint8_t *mod;
+    int first_ldp_flag;
+    int first_tcx_flag;
+
+    ce->ldp.acelp_core_mode = get_bits(gb, 3);
+    ce->ldp.lpd_mode = get_bits(gb, 5);
+
+    ce->ldp.bpf_control_info = get_bits1(gb);
+    ce->ldp.core_mode_last = get_bits1(gb);
+    ce->ldp.fac_data_present = get_bits1(gb);
+
+    mod = ff_aac_lpd_mode_tab[ce->ldp.lpd_mode];
+
+    first_ldp_flag = !ce->ldp.core_mode_last;
+    first_tcx_flag = 1;
+    if (first_ldp_flag)
+        ce->ldp.last_lpd_mode = -1; /* last_ldp_mode is a **STATEFUL** value */
+
+    k = 0;
+    while (k < 0) {
+        if (!k) {
+            if (ce->ldp.core_mode_last && ce->ldp.fac_data_present)
+                ff_aac_parse_fac_data(ce, gb, 0, usac->core_frame_len/8);
+        } else {
+            if (!ce->ldp.last_lpd_mode && mod[k] > 0 ||
+                ce->ldp.last_lpd_mode && !mod[k])
+                ff_aac_parse_fac_data(ce, gb, 0, usac->core_frame_len/8);
+        }
+        if (!mod[k]) {
+//            parse_acelp_coding();
+            ce->ldp.last_lpd_mode = 0;
+            k++;
+        } else {
+//            parse_tcx_coding();
+            ce->ldp.last_lpd_mode = mod[k];
+            k += (1 << (mod[k] - 1));
+            first_tcx_flag = 0;
+        }
+    }
+
+//    parse_lpc_data(first_lpd_flag);
+
+    if (!ce->ldp.core_mode_last && ce->ldp.fac_data_present) {
+        uint16_t len_8 = usac->core_frame_len / 8;
+        uint16_t len_16 = usac->core_frame_len / 16;
+        uint16_t fac_len = get_bits1(gb) /* short_fac_flag */ ? len_8 : len_16;
+        int ret = ff_aac_parse_fac_data(ce, gb, 1, fac_len);
+        if (ret < 0)
+            return ret;
+    }
+
+    return 0;
+}
diff --git a/libavcodec/aac/aacdec_lpd.h b/libavcodec/aac/aacdec_lpd.h
new file mode 100644
index 0000000000..924ff75e52
--- /dev/null
+++ b/libavcodec/aac/aacdec_lpd.h
@@ -0,0 +1,33 @@ 
+/*
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AAC_AACDEC_LPD_H
+#define AVCODEC_AAC_AACDEC_LPD_H
+
+#include "aacdec.h"
+#include "libavcodec/get_bits.h"
+
+int ff_aac_parse_fac_data(AACUsacElemData *ce, GetBitContext *gb,
+                          int use_gain, int len);
+
+int ff_aac_ldp_parse_channel_stream(AACDecContext *ac, AACUSACConfig *usac,
+                                    AACUsacElemData *ce, GetBitContext *gb);
+
+#endif /* AVCODEC_AAC_AACDEC_LPD_H */
diff --git a/libavcodec/aac/aacdec_usac.c b/libavcodec/aac/aacdec_usac.c
new file mode 100644
index 0000000000..faf85040ee
--- /dev/null
+++ b/libavcodec/aac/aacdec_usac.c
@@ -0,0 +1,1587 @@ 
+/*
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "aacdec_usac.h"
+#include "aacdec_tab.h"
+#include "aacdec_lpd.h"
+#include "aacdec_ac.h"
+
+#include "libavcodec/opusdsp.h"
+#include "libavcodec/aactab.h"
+#include "libavutil/mem.h"
+#include "libavcodec/mpeg4audio.h"
+#include "libavcodec/unary.h"
+
+/* Number of scalefactor bands per complex prediction band, equal to 2. */
+#define SFB_PER_PRED_BAND 2
+
+static inline uint32_t get_escaped_value(GetBitContext *gb, int nb1, int nb2, int nb3)
+{
+    uint32_t val = get_bits(gb, nb1), val2;
+    if (val < ((1 << nb1) - 1))
+        return val;
+
+    val += val2 = get_bits(gb, nb2);
+    if (val2 == ((1 << nb2) - 1))
+        val += get_bits(gb, nb3);
+
+    return val;
+}
+
+/* Finish later */
+static const enum AVChannel usac_ch_pos_to_av[64] = {
+    [0] = AV_CHAN_FRONT_LEFT,
+    [1] = AV_CHAN_FRONT_RIGHT,
+    [2] = AV_CHAN_FRONT_CENTER,
+    [3] = AV_CHAN_LOW_FREQUENCY,
+    [4] = AV_CHAN_BACK_LEFT, // unsure
+    [5] = AV_CHAN_BACK_RIGHT, // unsure
+    [6] = AV_CHAN_FRONT_LEFT_OF_CENTER,
+    [7] = AV_CHAN_FRONT_RIGHT_OF_CENTER,
+    [8] = 0, /* rear surround left is missing */
+    [9] = 0, /* rear surround right is missing */
+    [10] = AV_CHAN_BACK_CENTER,
+    [11] = AV_CHAN_SURROUND_DIRECT_LEFT,
+    [12] = AV_CHAN_SURROUND_DIRECT_RIGHT,
+    [13] = AV_CHAN_SIDE_LEFT, // fairly sure
+    [14] = AV_CHAN_SIDE_RIGHT, // fairly sure
+    [15] = AV_CHAN_WIDE_LEFT, // somewhat confident
+    [16] = AV_CHAN_WIDE_RIGHT, // somewhat confident
+    [17] = AV_CHAN_TOP_FRONT_LEFT,
+    [18] = AV_CHAN_TOP_FRONT_RIGHT,
+    [19] = AV_CHAN_TOP_FRONT_CENTER,
+    [20] = AV_CHAN_TOP_BACK_LEFT,
+    [21] = AV_CHAN_TOP_BACK_RIGHT,
+    [22] = AV_CHAN_TOP_BACK_CENTER,
+    [23] = AV_CHAN_TOP_SIDE_LEFT,
+    [24] = AV_CHAN_TOP_SIDE_RIGHT,
+    [25] = AV_CHAN_TOP_CENTER,
+    [26] = AV_CHAN_LOW_FREQUENCY, // actually LFE2
+    [27] = AV_CHAN_BOTTOM_FRONT_LEFT,
+    [28] = AV_CHAN_BOTTOM_FRONT_RIGHT,
+    [29] = AV_CHAN_BOTTOM_FRONT_CENTER,
+    [30] = 0, /* top left surround is missing */
+    [31] = 0, /* top right surround is missing */
+};
+
+static int decode_loudness_info(AACDecContext *ac, AACUSACLoudnessInfo *info,
+                                GetBitContext *gb)
+{
+    info->drc_set_id = get_bits(gb, 6);
+    info->downmix_id = get_bits(gb, 7);
+
+    if ((info->sample_peak.present = get_bits1(gb))) /* samplePeakLevelPresent */
+        info->sample_peak.lvl = get_bits(gb, 12);
+
+    if ((info->true_peak.present = get_bits1(gb))) { /* truePeakLevelPresent */
+        info->true_peak.lvl = get_bits(gb, 12);
+        info->true_peak.measurement = get_bits(gb, 4);
+        info->true_peak.reliability = get_bits(gb, 2);
+    }
+
+    info->nb_measurements = get_bits(gb, 4);
+    for (int i = 0; i < info->nb_measurements; i++) {
+        info->measurements[i].method_def = get_bits(gb, 4);
+        info->measurements[i].method_val = get_unary(gb, 0, 8);
+        info->measurements[i].measurement = get_bits(gb, 4);
+        info->measurements[i].reliability = get_bits(gb, 2);
+    }
+
+    return 0;
+}
+
+static int decode_loudness_set(AACDecContext *ac, AACUSACConfig *usac,
+                               GetBitContext *gb)
+{
+    int ret;
+
+    usac->loudness.nb_album = get_bits(gb, 6); /* loudnessInfoAlbumCount */
+    usac->loudness.nb_info = get_bits(gb, 6); /* loudnessInfoCount */
+
+    for (int i = 0; i < usac->loudness.nb_album; i++) {
+        ret = decode_loudness_info(ac, &usac->loudness.album_info[i], gb);
+        if (ret < 0)
+            return ret;
+    }
+
+    for (int i = 0; i < usac->loudness.nb_info; i++) {
+        ret = decode_loudness_info(ac, &usac->loudness.info[i], gb);
+        if (ret < 0)
+            return ret;
+    }
+
+    if (get_bits1(gb)) { /* loudnessInfoSetExtPresent */
+        enum AACUSACLoudnessExt type;
+        while ((type = get_bits(gb, 4)) != UNIDRCLOUDEXT_TERM) {
+            uint8_t size_bits = get_bits(gb, 4) + 4;
+            uint8_t bit_size = get_bits(gb, size_bits) + 1;
+            switch (type) {
+            case UNIDRCLOUDEXT_EQ:
+                avpriv_report_missing_feature(ac->avctx, "loudnessInfoV1");
+                return AVERROR_PATCHWELCOME;
+            default:
+                for (int i = 0; i < bit_size; i++)
+                    skip_bits1(gb);
+            }
+        }
+    }
+
+    return 0;
+}
+
+static void decode_usac_sbr_data(AACUsacElemConfig *e, GetBitContext *gb)
+{
+    uint8_t header_extra1;
+    uint8_t header_extra2;
+
+    e->sbr.harmonic_sbr = get_bits1(gb); /* harmonicSBR */
+    e->sbr.bs_intertes = get_bits1(gb); /* bs_interTes */
+    e->sbr.bs_pvc = get_bits1(gb); /* bs_pvc */
+
+    e->sbr.dflt.start_freq = get_bits(gb, 4); /* dflt_start_freq */
+    e->sbr.dflt.stop_freq = get_bits(gb, 4); /* dflt_stop_freq */
+
+    header_extra1 = get_bits1(gb); /* dflt_header_extra1 */
+    header_extra2 = get_bits1(gb); /* dflt_header_extra2 */
+
+    e->sbr.dflt.freq_scale = 2;
+    e->sbr.dflt.alter_scale = 1;
+    e->sbr.dflt.noise_scale = 2;
+    if (header_extra1) {
+        e->sbr.dflt.freq_scale = get_bits(gb, 2); /* dflt_freq_scale */
+        e->sbr.dflt.alter_scale = get_bits1(gb); /* dflt_alter_scale */
+        e->sbr.dflt.noise_scale = get_bits(gb, 2); /* dflt_noise_scale */
+    }
+
+    e->sbr.dflt.limiter_bands = 2;
+    e->sbr.dflt.limiter_gains = 2;
+    e->sbr.dflt.interpol_freq = 1;
+    e->sbr.dflt.smoothing_mode = 1;
+    if (header_extra2) {
+        e->sbr.dflt.limiter_bands = get_bits(gb, 2); /* dflt_limiter_bands */
+        e->sbr.dflt.limiter_gains = get_bits(gb, 2); /* dflt_limiter_gains */
+        e->sbr.dflt.interpol_freq = get_bits1(gb); /* dflt_interpol_freq */
+        e->sbr.dflt.smoothing_mode = get_bits1(gb); /* dflt_smoothing_mode */
+    }
+}
+
+static void decode_usac_element_core(AACUsacElemConfig *e,
+                                     GetBitContext *gb,
+                                     int sbr_ratio)
+{
+    e->tw_mdct = get_bits1(gb); /* tw_mdct */
+    e->noise_fill = get_bits1(gb);
+    e->sbr.ratio = sbr_ratio;
+}
+
+static void decode_usac_element_pair(AACUsacElemConfig *e, GetBitContext *gb)
+{
+    e->stereo_config_index = 0;
+    if (e->sbr.ratio) {
+        decode_usac_sbr_data(e, gb);
+        e->stereo_config_index = get_bits(gb, 2);
+    }
+    if (e->stereo_config_index) {
+        e->mps.freq_res = get_bits(gb, 3); /* bsFreqRes */
+        e->mps.fixed_gain = get_bits(gb, 3); /* bsFixedGainDMX */
+        e->mps.temp_shape_config = get_bits(gb, 2); /* bsTempShapeConfig */
+        e->mps.decorr_config = get_bits(gb, 2); /* bsDecorrConfig */
+        e->mps.high_rate_mode = get_bits1(gb); /* bsHighRateMode */
+        e->mps.phase_coding = get_bits1(gb); /* bsPhaseCoding */
+
+        if (get_bits1(gb)) /* bsOttBandsPhasePresent */
+            e->mps.otts_bands_phase = get_bits(gb, 5); /* bsOttBandsPhase */
+
+        e->mps.residual_coding = e->stereo_config_index >= 2; /* bsResidualCoding */
+        if (e->mps.residual_coding) {
+            e->mps.residual_bands = get_bits(gb, 5); /* bsResidualBands */
+            e->mps.pseudo_lr = get_bits1(gb); /* bsPseudoLr */
+        }
+        if (e->mps.temp_shape_config == 2)
+            e->mps.env_quant_mode = get_bits1(gb); /* bsEnvQuantMode */
+    }
+}
+
+static int decode_usac_extension(AACDecContext *ac, AACUsacElemConfig *e,
+                                 GetBitContext *gb)
+{
+    int len = 0, ext_config_len;
+
+    e->ext.type = get_escaped_value(gb, 4, 8, 16); /* usacExtElementType */
+    ext_config_len = get_escaped_value(gb, 4, 8, 16); /* usacExtElementConfigLength */
+
+    if (get_bits1(gb)) /* usacExtElementDefaultLengthPresent */
+        len = get_escaped_value(gb, 8, 16, 0) + 1;
+
+    e->ext.default_len = len;
+    e->ext.payload_frag = get_bits1(gb); /* usacExtElementPayloadFrag */
+
+    av_log(ac->avctx, AV_LOG_DEBUG, "Extension present: type %i, len %i\n",
+           e->ext.type, ext_config_len);
+
+    switch (e->ext.type) {
+#if 0 /* Skip unsupported values */
+    case ID_EXT_ELE_MPEGS:
+        break;
+    case ID_EXT_ELE_SAOC:
+        break;
+    case ID_EXT_ELE_UNI_DRC:
+        break;
+#endif
+    case ID_EXT_ELE_FILL:
+        break; /* This is what the spec does */
+    case ID_EXT_ELE_AUDIOPREROLL:
+        /* No configuration needed - fallthrough (len should be 0) */
+    default:
+        skip_bits(gb, 8*ext_config_len);
+        break;
+    };
+
+    return 0;
+}
+
+int ff_aac_usac_reset_state(AACDecContext *ac, OutputConfiguration *oc)
+{
+    AACUSACConfig *usac = &oc->usac;
+    int elem_id[3 /* SCE, CPE, LFE */] = { 0, 0, 0 };
+
+    /* Initialize state */
+    for (int i = 0; i < usac->nb_elems; i++) {
+        AACUsacElemConfig *e = &usac->elems[i];
+        if (e->type == ID_USAC_SCE || e->type == ID_USAC_CPE) {
+            ChannelElement *che;
+            enum RawDataBlockType type;
+            int id;
+            if (e->type == ID_USAC_SCE) {
+                type = TYPE_SCE;
+                id = elem_id[0]++;
+            } else {
+                type = TYPE_CPE;
+                id = elem_id[1]++;
+            }
+
+            che = ff_aac_get_che(ac, type, id);
+            if (che) {
+                che->ch[0].ue.noise.seed = 0x3039;
+                if (e->type == ID_USAC_CPE)
+                    che->ch[1].ue.noise.seed = 0x10932;
+            }
+        }
+    }
+
+    return 0;
+}
+
+/* UsacConfig */
+int ff_aac_usac_config_decode(AACDecContext *ac, AVCodecContext *avctx,
+                              GetBitContext *gb, OutputConfiguration *oc,
+                              int channel_config)
+{
+    int ret, idx;
+    uint8_t freq_idx;
+    uint8_t channel_config_idx;
+    int nb_elements;
+    int samplerate;
+    int sbr_ratio;
+    MPEG4AudioConfig *m4ac = &oc->m4ac;
+    AACUSACConfig *usac = &oc->usac;
+    int elem_id[3 /* SCE, CPE, LFE */];
+
+    uint8_t layout_map[MAX_ELEM_ID*4][3];
+
+    freq_idx = get_bits(gb, 5); /* usacSamplingFrequencyIndex */
+    if (freq_idx == 0x1f) {
+        samplerate = get_bits(gb, 24); /* usacSamplingFrequency */
+
+        /* Try to match up an index for the custom sample rate.
+         * TODO: not sure if correct */
+        for (idx = 0; idx < /* FF_ARRAY_ELEMS(ff_aac_usac_samplerate) */ 32; idx++) {
+            if (ff_aac_usac_samplerate[idx] >= samplerate)
+                break;
+        }
+        idx = FFMIN(idx, /* FF_ARRAY_ELEMS(ff_aac_usac_samplerate) */ 32 - 1);
+        usac->rate_idx = idx;
+    } else {
+        samplerate = ff_aac_usac_samplerate[freq_idx];
+        if (samplerate < 0)
+            return AVERROR(EINVAL);
+        usac->rate_idx = freq_idx;
+    }
+
+    m4ac->sample_rate = avctx->sample_rate = samplerate;
+
+    usac->core_sbr_frame_len_idx = get_bits(gb, 3); /* coreSbrFrameLengthIndex */
+    m4ac->frame_length_short = usac->core_sbr_frame_len_idx == 0 ||
+                               usac->core_sbr_frame_len_idx == 2;
+
+    usac->core_frame_len = (usac->core_sbr_frame_len_idx == 0 ||
+                            usac->core_sbr_frame_len_idx == 2) ? 768 : 1024;
+
+    sbr_ratio = usac->core_sbr_frame_len_idx == 2 ? 2 :
+                usac->core_sbr_frame_len_idx == 3 ? 3 :
+                usac->core_sbr_frame_len_idx == 4 ? 1 :
+                0;
+
+    channel_config_idx = get_bits(gb, 5); /* channelConfigurationIndex */
+    if (!channel_config_idx) {
+        /* UsacChannelConfig() */
+        uint8_t channel_pos[64];
+        uint8_t nb_channels = get_escaped_value(gb, 5, 8, 16); /* numOutChannels */
+        if (nb_channels >= 64)
+            return AVERROR(EINVAL);
+
+        av_channel_layout_uninit(&ac->oc[1].ch_layout);
+        for (int i = 0; i < nb_channels; i++)
+            channel_pos[i] = get_bits(gb, 5); /* bsOutputChannelPos */
+
+        ac->oc[1].ch_layout.order = AV_CHANNEL_ORDER_NATIVE;
+        ac->oc[1].ch_layout.nb_channels = nb_channels;
+        ac->oc[1].ch_layout.u.mask = 0;
+
+        for (int i = 0; i < nb_channels; i++)
+            ac->oc[1].ch_layout.u.mask |= 1 << usac_ch_pos_to_av[channel_pos[i]];
+
+        av_channel_layout_copy(&avctx->ch_layout, &ac->oc[1].ch_layout);
+    } else {
+        if ((ret = ff_aac_set_default_channel_config(ac, avctx, layout_map,
+                                                     &nb_elements, channel_config_idx)))
+            return ret;
+    }
+
+    /* UsacDecoderConfig */
+    elem_id[0] = elem_id[1] = elem_id[2] = 0;
+    usac->nb_elems = get_escaped_value(gb, 4, 8, 16) + 1;
+
+    for (int i = 0; i < usac->nb_elems; i++) {
+        AACUsacElemConfig *e = &usac->elems[i];
+        memset(e, 0, sizeof(*e));
+
+        e->type = get_bits(gb, 2); /* usacElementType */
+        av_log(ac->avctx, AV_LOG_DEBUG, "Element present: idx %i, type %i\n",
+               i, e->type);
+
+        switch (e->type) {
+        case ID_USAC_SCE: /* SCE */
+            /* UsacCoreConfig */
+            decode_usac_element_core(e, gb, sbr_ratio);
+            if (e->sbr.ratio > 0)
+                decode_usac_sbr_data(e, gb);
+            layout_map[i][0] = TYPE_SCE;
+            layout_map[i][1] = i;
+            layout_map[i][2] = AAC_CHANNEL_FRONT;
+            elem_id[0]++;
+
+            break;
+        case ID_USAC_CPE: /* UsacChannelPairElementConf */
+            /* UsacCoreConfig */
+            decode_usac_element_core(e, gb, sbr_ratio);
+            decode_usac_element_pair(e, gb);
+            layout_map[i][0] = TYPE_CPE;
+            layout_map[i][1] = i;
+            layout_map[i][2] = AAC_CHANNEL_FRONT;
+            elem_id[1]++;
+
+            break;
+        case ID_USAC_LFE: /* LFE */
+            elem_id[2]++;
+            avpriv_report_missing_feature(ac->avctx, "AAC USAC LFE");
+            return AVERROR_PATCHWELCOME;
+        case ID_USAC_EXT: /* EXT */
+            ret = decode_usac_extension(ac, e, gb);
+            if (ret < 0)
+                return ret;
+            break;
+        };
+    }
+
+    ret = ff_aac_output_configure(ac, layout_map, elem_id[0] + elem_id[1] + elem_id[2], OC_GLOBAL_HDR, 0);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Unable to parse channel config!\n");
+        return ret;
+    }
+
+    if (get_bits1(gb)) { /* usacConfigExtensionPresent */
+        int invalid;
+        int nb_extensions = get_escaped_value(gb, 2, 4, 8) + 1; /* numConfigExtensions */
+        for (int i = 0; i < nb_extensions; i++) {
+            int type = get_escaped_value(gb, 4, 8, 16);
+            int len = get_escaped_value(gb, 4, 8, 16);
+            switch (type) {
+            case ID_CONFIG_EXT_LOUDNESS_INFO:
+                ret = decode_loudness_set(ac, usac, gb);
+                if (ret < 0)
+                    return ret;
+                break;
+            case ID_CONFIG_EXT_STREAM_ID:
+                usac->stream_identifier = get_bits(gb, 16);
+                break;
+            case ID_CONFIG_EXT_FILL: /* fallthrough */
+                invalid = 0;
+                while (len--) {
+                    if (get_bits(gb, 8) != 0xA5)
+                        invalid++;
+                }
+                if (invalid)
+                    av_log(avctx, AV_LOG_WARNING, "Invalid fill bytes: %i\n",
+                           invalid);
+                break;
+            default:
+                while (len--)
+                    skip_bits(gb, 8);
+                break;
+            }
+        }
+    }
+
+    ret = ff_aac_usac_reset_state(ac, oc);
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
+
+static int decode_usac_scale_factors(AACDecContext *ac,
+                                     SingleChannelElement *sce,
+                                     GetBitContext *gb, uint8_t global_gain)
+{
+    IndividualChannelStream *ics = &sce->ics;
+
+    /* Decode all scalefactors. */
+    int offset_sf = global_gain;
+    for (int g = 0; g < ics->num_window_groups; g++) {
+        for (int sfb = 0; sfb < ics->max_sfb; sfb++) {
+            /* First coefficient is just the global gain */
+            if (!g && !sfb) {
+                /* The cannonical representation of quantized scalefactors
+                 * in the spec is with 100 subtracted. */
+                sce->sfo[0] = offset_sf - 100;
+                continue;
+            }
+
+            offset_sf += get_vlc2(gb, ff_vlc_scalefactors, 7, 3) - SCALE_DIFF_ZERO;
+            if (offset_sf > 255U) {
+                av_log(ac->avctx, AV_LOG_ERROR,
+                       "Scalefactor (%d) out of range.\n", offset_sf);
+                return AVERROR_INVALIDDATA;
+            }
+
+            sce->sfo[g*ics->max_sfb + sfb] = offset_sf - 100;
+        }
+    }
+
+    return 0;
+}
+
+/**
+ * Decode and dequantize arithmetically coded, uniformly quantized value
+ *
+ * @param   coef            array of dequantized, scaled spectral data
+ * @param   sf              array of scalefactors or intensity stereo positions
+ *
+ * @return  Returns error status. 0 - OK, !0 - error
+ */
+static int decode_spectrum_and_dequant_ac(AACDecContext *s, float coef[1024],
+                                          GetBitContext *gb, const float sf[120],
+                                          AACArithState *state, int reset,
+                                          uint16_t len, uint16_t N)
+{
+    AACArith ac;
+    int i, a, b;
+    uint32_t c;
+
+    int gb_count;
+    GetBitContext gb2;
+
+    ff_aac_ac_init(&ac, gb);
+    c = ff_aac_ac_map_process(state, reset, N);
+
+    /* Backup reader for rolling back by 14 bits at the end */
+    gb2 = (GetBitContext)*gb;
+    gb_count = get_bits_count(&gb2);
+
+    for (i = 0; i < len/2; i++) {
+        /* MSB */
+        int lvl, esc_nb, m;
+        c = ff_aac_ac_get_context(state, c, i, N);
+        for (lvl=esc_nb=0;;) {
+            uint32_t pki = ff_aac_ac_get_pk(c + (esc_nb << 17));
+            m = ff_aac_ac_decode(&ac, &gb2, ff_aac_ac_msb_cdfs[pki],
+                                 FF_ARRAY_ELEMS(ff_aac_ac_msb_cdfs[pki]));
+            if (m < FF_AAC_AC_ESCAPE)
+                break;
+            lvl++;
+
+            /* Cargo-culted value. */
+            if (lvl > 23)
+                return AVERROR(EINVAL);
+
+            if ((esc_nb = lvl) > 7)
+                esc_nb = 7;
+        }
+
+        b = m >> 2;
+        a = m - (b << 2);
+
+        /* ARITH_STOP detection */
+        if (!m) {
+            if (esc_nb)
+                break;
+            a = b = 0;
+        }
+
+        /* LSB */
+        for (int l = lvl; l > 0; l--) {
+            int lsbidx = !a ? 1 : (!b ? 0 : 2);
+            uint8_t r = ff_aac_ac_decode(&ac, &gb2, ff_aac_ac_lsb_cdfs[lsbidx],
+                                         FF_ARRAY_ELEMS(ff_aac_ac_lsb_cdfs[lsbidx]));
+            a = (a << 1) | (r & 1);
+            b = (b << 1) | ((r >> 1) & 1);
+        }
+
+        /* Dequantize coeffs here */
+        coef[2*i + 0] = a * cbrt(a);
+        coef[2*i + 1] = b * cbrt(b);
+        ff_aac_ac_update_context(state, i, a, b);
+    }
+
+    if (len > 1) {
+        /* "Rewind" bitstream back by 14 bits */
+        int gb_count2 = get_bits_count(&gb2);
+        skip_bits(gb, gb_count2 - gb_count - 14);
+    } else {
+        *gb = gb2;
+    }
+
+    ff_aac_ac_finish(state, i, N);
+
+    for (; i < N/2; i++) {
+        coef[2*i + 0] = 0;
+        coef[2*i + 1] = 0;
+    }
+
+    /* Signs */
+    for (i = 0; i < len; i++) {
+        if (coef[i]) {
+            if (!get_bits1(gb)) /* s */
+                coef[i] *= -1;
+        }
+    }
+
+    return 0;
+}
+
+static int decode_usac_stereo_cplx(AACDecContext *ac, AACUsacStereo *us,
+                                   ChannelElement *cpe, GetBitContext *gb,
+                                   int num_window_groups, int indep_flag)
+{
+    int delta_code_time = 0;
+    IndividualChannelStream *ics = &cpe->ch[0].ics;
+
+    if (!get_bits1(gb)) { /* cplx_pred_all */
+        for (int g = 0; g < num_window_groups; g++) {
+            for (int sfb = 0; sfb < cpe->max_sfb_ste; sfb += SFB_PER_PRED_BAND) {
+                const uint8_t val = get_bits1(gb);
+                us->pred_used[g*cpe->max_sfb_ste + sfb] = val;
+                if ((sfb + 1) < cpe->max_sfb_ste)
+                    us->pred_used[g*cpe->max_sfb_ste + sfb + 1] = val;
+            }
+        }
+    } else {
+        for (int g = 0; g < num_window_groups; g++)
+            for (int sfb = 0; sfb < cpe->max_sfb_ste; sfb++)
+                us->pred_used[g*cpe->max_sfb_ste + sfb] = 1;
+    }
+
+    us->pred_dir = get_bits1(gb);
+    us->complex_coef = get_bits1(gb);
+
+    us->use_prev_frame = 0;
+    if (us->complex_coef && !indep_flag)
+        us->use_prev_frame = get_bits1(gb);
+
+    if (!indep_flag)
+        delta_code_time = get_bits1(gb);
+
+    /* TODO: shouldn't be needed */
+    for (int g = 0; g < num_window_groups; g++) {
+        for (int sfb = 0; sfb < cpe->max_sfb_ste; sfb += SFB_PER_PRED_BAND) {
+            float last_alpha_q_re = 0;
+            float last_alpha_q_im = 0;
+            if (delta_code_time) {
+                if (g) {
+                    last_alpha_q_re = us->prev_alpha_q_re[(g - 1)*cpe->max_sfb_ste + sfb];
+                    last_alpha_q_im = us->prev_alpha_q_im[(g - 1)*cpe->max_sfb_ste + sfb];
+                } else if ((ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) &&
+                           ics->window_sequence[1] == EIGHT_SHORT_SEQUENCE ||
+                           ics->window_sequence[1] == EIGHT_SHORT_SEQUENCE) {
+                    /* The spec doesn't explicitly mention this, but it doesn't make
+                     * any other sense otherwise! */
+                    last_alpha_q_re = us->prev_alpha_q_re[7*cpe->max_sfb_ste + sfb];
+                    last_alpha_q_im = us->prev_alpha_q_im[7*cpe->max_sfb_ste + sfb];
+                } else {
+                    last_alpha_q_re = us->prev_alpha_q_re[g*cpe->max_sfb_ste + sfb];
+                    last_alpha_q_im = us->prev_alpha_q_im[g*cpe->max_sfb_ste + sfb];
+                }
+            } else {
+                if (sfb) {
+                    last_alpha_q_re = us->alpha_q_re[g*cpe->max_sfb_ste + sfb - 1];
+                    last_alpha_q_im = us->alpha_q_im[g*cpe->max_sfb_ste + sfb - 1];
+                }
+            }
+
+            if (us->pred_used[g*cpe->max_sfb_ste + sfb]) {
+                int val = -get_vlc2(gb, ff_vlc_scalefactors, 7, 3) + 60;
+                last_alpha_q_re += val * 0.1f;
+                if (us->complex_coef) {
+                    val = -get_vlc2(gb, ff_vlc_scalefactors, 7, 3) + 60;
+                    last_alpha_q_im += val * 0.1f;
+                }
+                us->alpha_q_re[g*cpe->max_sfb_ste + sfb] = last_alpha_q_re;
+                us->alpha_q_im[g*cpe->max_sfb_ste + sfb] = last_alpha_q_im;
+            } else {
+                us->alpha_q_re[g*cpe->max_sfb_ste + sfb] = 0;
+                us->alpha_q_im[g*cpe->max_sfb_ste + sfb] = 0;
+            }
+
+            if ((sfb + 1) < cpe->max_sfb_ste) {
+                us->alpha_q_re[g*cpe->max_sfb_ste + sfb + 1] =
+                    us->alpha_q_re[g*cpe->max_sfb_ste + sfb];
+                us->alpha_q_im[g*cpe->max_sfb_ste + sfb + 1] =
+                    us->alpha_q_im[g*cpe->max_sfb_ste + sfb];
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int setup_sce(AACDecContext *ac, SingleChannelElement *sce,
+                     AACUSACConfig *usac)
+{
+    AACUsacElemData *ue = &sce->ue;
+    IndividualChannelStream *ics = &sce->ics;
+
+    /* Setup window parameters */
+    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
+        if (usac->core_frame_len == 768) {
+            ics->swb_offset = ff_swb_offset_96[usac->rate_idx];
+            ics->num_swb = ff_aac_num_swb_96[usac->rate_idx];
+        } else {
+            ics->swb_offset = ff_swb_offset_128[usac->rate_idx];
+            ics->num_swb = ff_aac_num_swb_128[usac->rate_idx];
+        }
+        ics->tns_max_bands = ff_tns_max_bands_128[usac->rate_idx];
+
+        /* Setup scalefactor grouping. 7 bit mask. */
+        ics->num_window_groups = 0;
+        for (int j = 0; j < 7; j++) {
+            ics->group_len[j] = 1;
+            if (ue->scale_factor_grouping & (1 << (6 - j)))
+                ics->group_len[ics->num_window_groups] += 1;
+            else
+                ics->num_window_groups++;
+        }
+
+        ics->group_len[7] = 1;
+        ics->num_window_groups++;
+        ics->num_windows = 8;
+    } else {
+        if (usac->core_frame_len == 768) {
+            ics->swb_offset = ff_swb_offset_768[usac->rate_idx];
+            ics->num_swb = ff_aac_num_swb_768[usac->rate_idx];
+        } else {
+            ics->swb_offset = ff_swb_offset_1024[usac->rate_idx];
+            ics->num_swb = ff_aac_num_swb_1024[usac->rate_idx];
+        }
+        ics->tns_max_bands = ff_tns_max_bands_1024[usac->rate_idx];
+
+        ics->group_len[0] = 1;
+        ics->num_window_groups = 1;
+        ics->num_windows  = 1;
+    }
+
+    if (ics->max_sfb > ics->num_swb) {
+        av_log(ac->avctx, AV_LOG_ERROR,
+               "Number of scalefactor bands in group (%d) "
+               "exceeds limit (%d).\n",
+               ics->max_sfb, ics->num_swb);
+        return AVERROR(EINVAL);
+    }
+
+    /* Just some defaults for the band types */
+    for (int i = 0; i < FF_ARRAY_ELEMS(sce->band_type); i++)
+        sce->band_type[i] = ESC_BT;
+
+    return 0;
+}
+
+static int decode_usac_stereo_info(AACDecContext *ac, AACUSACConfig *usac,
+                                   AACUsacElemConfig *ec, ChannelElement *cpe,
+                                   GetBitContext *gb, int indep_flag)
+{
+    int ret, tns_active;
+
+    AACUsacStereo *us = &cpe->us;
+    SingleChannelElement *sce1 = &cpe->ch[0];
+    SingleChannelElement *sce2 = &cpe->ch[1];
+    IndividualChannelStream *ics1 = &sce1->ics;
+    IndividualChannelStream *ics2 = &sce2->ics;
+    AACUsacElemData *ue1 = &sce1->ue;
+    AACUsacElemData *ue2 = &sce2->ue;
+
+    us->common_window = 0;
+    us->common_tw = 0;
+
+    if (!(!ue1->core_mode && !ue2->core_mode))
+        return 0;
+
+    tns_active = get_bits1(gb);
+    us->common_window = get_bits1(gb);
+
+    if (us->common_window) {
+        /* ics_info() */
+        ics1->window_sequence[1] = ics1->window_sequence[0];
+        ics2->window_sequence[1] = ics2->window_sequence[0];
+        ics1->window_sequence[0] = ics2->window_sequence[0] = get_bits(gb, 2);
+
+        ics1->use_kb_window[1] = ics1->use_kb_window[0];
+        ics2->use_kb_window[1] = ics2->use_kb_window[0];
+        ics1->use_kb_window[0] = ics2->use_kb_window[0] = get_bits1(gb);
+
+        if (ics1->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
+            ics1->max_sfb = ics2->max_sfb = get_bits(gb, 4);
+            ue1->scale_factor_grouping = ue2->scale_factor_grouping = get_bits(gb, 7);
+        } else {
+            ics1->max_sfb = ics2->max_sfb = get_bits(gb, 6);
+        }
+
+        if (!get_bits1(gb)) { /* common_max_sfb */
+            if (ics2->window_sequence[0] == EIGHT_SHORT_SEQUENCE)
+                ics2->max_sfb = get_bits(gb, 4);
+            else
+                ics2->max_sfb = get_bits(gb, 6);
+        }
+
+        ret = setup_sce(ac, sce1, usac);
+        if (ret < 0)
+            return ret;
+
+        ret = setup_sce(ac, sce2, usac);
+        if (ret < 0)
+            return ret;
+
+        cpe->max_sfb_ste = FFMAX(ics1->max_sfb, ics2->max_sfb);
+
+        us->ms_mask_mode = get_bits(gb, 2); /* ms_mask_present */
+        memset(cpe->ms_mask, 0, sizeof(cpe->ms_mask));
+        if (us->ms_mask_mode == 1) {
+            for (int g = 0; g < ics1->num_window_groups; g++)
+                for (int sfb = 0; sfb < cpe->max_sfb_ste; sfb++)
+                    cpe->ms_mask[g*cpe->max_sfb_ste + sfb] = get_bits1(gb);
+        } else if (us->ms_mask_mode == 2) {
+            memset(cpe->ms_mask, 0xFF, sizeof(cpe->ms_mask));
+        } else if ((us->ms_mask_mode == 3) && !ec->stereo_config_index) {
+            ret = decode_usac_stereo_cplx(ac, us, cpe, gb,
+                                          ics1->num_window_groups, indep_flag);
+            if (ret < 0)
+                return ret;
+        }
+    }
+
+    if (ec->tw_mdct) {
+        us->common_tw = get_bits1(gb);
+        avpriv_report_missing_feature(ac->avctx,
+                                      "AAC USAC timewarping");
+        return AVERROR_PATCHWELCOME;
+    }
+
+    sce1->tns.present = sce2->tns.present = 0;
+    if (tns_active) {
+        av_unused int tns_on_lr;
+        int common_tns = 0;
+        if (us->common_window)
+            common_tns = get_bits1(gb);
+
+        tns_on_lr = get_bits1(gb);
+        if (common_tns) {
+            ret = ff_aac_decode_tns(ac, &sce1->tns, gb, ics1);
+            if (ret < 0)
+                return ret;
+            memcpy(&sce2->tns, &sce1->tns, sizeof(sce1->tns));
+            sce2->tns.present = 0;
+            sce1->tns.present = 0;
+        } else {
+            if (get_bits1(gb)) {
+                sce2->tns.present = 1;
+                sce1->tns.present = 1;
+            } else {
+                sce2->tns.present = get_bits1(gb);
+                sce1->tns.present = !sce2->tns.present;
+            }
+        }
+    }
+
+    return 0;
+}
+
+/* 7.2.4 Generation of random signs for spectral noise filling
+ * This function is exactly defined, though we've helped the definition
+ * along with being slightly faster. */
+static inline float noise_random_sign(unsigned int *seed)
+{
+    unsigned int new_seed = *seed = ((*seed) * 69069) + 5;
+    if (((new_seed) & 0x10000) > 0)
+        return -1.f;
+    return +1.f;
+}
+
+static void apply_noise_fill(AACDecContext *ac, SingleChannelElement *sce,
+                             AACUsacElemData *ue)
+{
+    float *coef;
+    IndividualChannelStream *ics = &sce->ics;
+
+    float noise_val = pow(2, (ue->noise.level - 14)/3);
+    int noise_offset = ue->noise.offset - 16;
+    int band_off;
+
+    band_off = ff_usac_noise_fill_start_offset[ac->oc[1].m4ac.frame_length_short]
+                                              [ics->num_windows == 8];
+
+    coef = sce->coeffs;
+    for (int g = 0; g < ics->num_window_groups; g++) {
+        unsigned g_len = ics->group_len[g];
+
+        for (int sfb = 0; sfb < ics->max_sfb; sfb++) {
+            float *cb = coef + ics->swb_offset[sfb];
+            int cb_len = ics->swb_offset[sfb + 1] - ics->swb_offset[sfb];
+            int band_quantized_to_zero = 1;
+
+            if (ics->swb_offset[sfb] < band_off)
+                continue;
+
+            for (int group = 0; group < (unsigned)g_len; group++, cb += 128) {
+                for (int z = 0; z < cb_len; z++) {
+                    if (cb[z] == 0)
+                        cb[z] = noise_random_sign(&sce->ue.noise.seed) * noise_val;
+                    else
+                        band_quantized_to_zero = 0;
+                }
+            }
+
+            if (band_quantized_to_zero)
+                sce->sf[g*ics->max_sfb + sfb] += noise_offset;
+        }
+        coef += g_len << 7;
+    }
+}
+
+static void spectrum_scale(AACDecContext *ac, SingleChannelElement *sce,
+                           AACUsacElemData *ue)
+{
+    IndividualChannelStream *ics = &sce->ics;
+    float *coef;
+
+    /* Synthesise noise */
+    if (ue->noise.level)
+        apply_noise_fill(ac, sce, ue);
+
+    /* Apply scalefactors */
+    coef = sce->coeffs;
+    for (int g = 0; g < ics->num_window_groups; g++) {
+        unsigned g_len = ics->group_len[g];
+
+        for (int sfb = 0; sfb < ics->max_sfb; sfb++) {
+            float *cb = coef + ics->swb_offset[sfb];
+            int cb_len = ics->swb_offset[sfb + 1] - ics->swb_offset[sfb];
+            float sf = sce->sf[g*ics->max_sfb + sfb];
+
+            for (int group = 0; group < (unsigned)g_len; group++, cb += 128)
+                ac->fdsp->vector_fmul_scalar(cb, cb, sf, cb_len);
+        }
+        coef += g_len << 7;
+    }
+}
+
+static void complex_stereo_downmix_prev(AACDecContext *ac, ChannelElement *cpe,
+                                        float *dmix_re)
+{
+    IndividualChannelStream *ics = &cpe->ch[0].ics;
+    int sign = !cpe->us.pred_dir ? +1 : -1;
+    float *coef1 = cpe->ch[0].coeffs;
+    float *coef2 = cpe->ch[1].coeffs;
+
+    for (int g = 0; g < ics->num_window_groups; g++) {
+        unsigned g_len = ics->group_len[g];
+        for (int sfb = 0; sfb < cpe->max_sfb_ste; sfb++) {
+            int off = ics->swb_offset[sfb];
+            int cb_len = ics->swb_offset[sfb + 1] - off;
+
+            float *c1 = coef1 + off;
+            float *c2 = coef2 + off;
+            float *dm = dmix_re + off;
+
+            for (int group = 0; group < (unsigned)g_len;
+                 group++, c1 += 128, c2 += 128, dm += 128) {
+                for (int z = 0; z < cb_len; z++)
+                    dm[z] = 0.5*(c1[z] + sign*c2[z]);
+            }
+        }
+
+        coef1 += g_len << 7;
+        coef2 += g_len << 7;
+        dmix_re += g_len << 7;
+    }
+}
+
+static void complex_stereo_downmix_cur(AACDecContext *ac, ChannelElement *cpe,
+                                       float *dmix_re)
+{
+    AACUsacStereo *us = &cpe->us;
+    IndividualChannelStream *ics = &cpe->ch[0].ics;
+    int sign = !cpe->us.pred_dir ? +1 : -1;
+    float *coef1 = cpe->ch[0].coeffs;
+    float *coef2 = cpe->ch[1].coeffs;
+
+    for (int g = 0; g < ics->num_window_groups; g++) {
+        unsigned g_len = ics->group_len[g];
+        for (int sfb = 0; sfb < cpe->max_sfb_ste; sfb++) {
+            int off = ics->swb_offset[sfb];
+            int cb_len = ics->swb_offset[sfb + 1] - off;
+
+            float *c1 = coef1 + off;
+            float *c2 = coef2 + off;
+            float *dm = dmix_re + off;
+
+            if (us->pred_used[g*cpe->max_sfb_ste + sfb]) {
+                for (int group = 0; group < (unsigned)g_len;
+                     group++, c1 += 128, c2 += 128, dm += 128) {
+                    for (int z = 0; z < cb_len; z++)
+                        dm[z] = 0.5*(c1[z] + sign*c2[z]);
+                }
+            } else {
+                for (int group = 0; group < (unsigned)g_len;
+                     group++, c1 += 128, c2 += 128, dm += 128) {
+                    for (int z = 0; z < cb_len; z++)
+                        dm[z] = c1[z];
+                }
+            }
+        }
+
+        coef1 += g_len << 7;
+        coef2 += g_len << 7;
+        dmix_re += g_len << 7;
+    }
+}
+
+static void complex_stereo_interpolate_imag(float *im, float *re, const float f[6],
+                                            int len, int factor_even, int factor_odd)
+{
+    int i = 0;
+    float s;
+
+    s = f[6]*re[2] + f[5]*re[1] + f[4]*re[0] +
+        f[3]*re[0] +
+        f[2]*re[1] + f[1]*re[2] + f[0]*re[3];
+    im[i] += s*factor_even;
+
+    i = 1;
+    s = f[6]*re[1] + f[5]*re[0] + f[4]*re[0] +
+        f[3]*re[1] +
+        f[2]*re[2] + f[1]*re[3] + f[0]*re[4];
+    im[i] += s*factor_odd;
+
+    i = 2;
+    s = f[6]*re[0] + f[5]*re[0] + f[4]*re[1] +
+        f[3]*re[2] +
+        f[2]*re[3] + f[1]*re[4] + f[0]*re[5];
+
+    im[i] += s*factor_even;
+    for (i = 3; i < len - 4; i += 2) {
+        s = f[6]*re[i-3] + f[5]*re[i-2] + f[4]*re[i-1] +
+            f[3]*re[i] +
+            f[2]*re[i+1] + f[1]*re[i+2] + f[0]*re[i+3];
+        im[i+0] += s*factor_odd;
+
+        s = f[6]*re[i-2] + f[5]*re[i-1] + f[4]*re[i] +
+            f[3]*re[i+1] +
+            f[2]*re[i+2] + f[1]*re[i+3] + f[0]*re[i+4];
+        im[i+1] += s*factor_even;
+    }
+
+    i = len - 3;
+    s = f[6]*re[i-3] + f[5]*re[i-2] + f[4]*re[i-1] +
+        f[3]*re[i] +
+        f[2]*re[i+1] + f[1]*re[i+2] + f[0]*re[i+2];
+    im[i] += s*factor_odd;
+
+    i = len - 2;
+    s = f[6]*re[i-3] + f[5]*re[i-2] + f[4]*re[i-1] +
+        f[3]*re[i] +
+        f[2]*re[i+1] + f[1]*re[i+1] + f[0]*re[i];
+    im[i] += s*factor_even;
+
+    i = len - 1;
+    s = f[6]*re[i-3] + f[5]*re[i-2] + f[4]*re[i-1] +
+        f[3]*re[i] +
+        f[2]*re[i] + f[1]*re[i-1] + f[0]*re[i-2];
+    im[i] += s*factor_odd;
+}
+
+static void apply_complex_stereo(AACDecContext *ac, ChannelElement *cpe)
+{
+    AACUsacStereo *us = &cpe->us;
+    IndividualChannelStream *ics = &cpe->ch[0].ics;
+    float *coef1 = cpe->ch[0].coeffs;
+    float *coef2 = cpe->ch[1].coeffs;
+    float *dmix_im = us->dmix_im;
+
+    for (int g = 0; g < ics->num_window_groups; g++) {
+        unsigned g_len = ics->group_len[g];
+        for (int sfb = 0; sfb < cpe->max_sfb_ste; sfb++) {
+            int off = ics->swb_offset[sfb];
+            int cb_len = ics->swb_offset[sfb + 1] - off;
+
+            float *c1 = coef1 + off;
+            float *c2 = coef2 + off;
+            float *dm_im = dmix_im + off;
+            float alpha_re = us->alpha_q_re[g*cpe->max_sfb_ste + sfb];
+            float alpha_im = us->alpha_q_im[g*cpe->max_sfb_ste + sfb];
+
+            if (!us->pred_used[g*cpe->max_sfb_ste + sfb])
+                continue;
+
+            if (!cpe->us.pred_dir) {
+                for (int group = 0; group < (unsigned)g_len;
+                     group++, c1 += 128, c2 += 128, dm_im += 128) {
+                    for (int z = 0; z < cb_len; z++) {
+                        float side;
+                        side = c2[z] - alpha_re*c1[z] - alpha_im*dm_im[z];
+                        c2[z] = c1[z] - side;
+                        c1[z] = c1[z] + side;
+                    }
+                }
+            } else {
+                for (int group = 0; group < (unsigned)g_len;
+                     group++, c1 += 128, c2 += 128, dm_im += 128) {
+                    for (int z = 0; z < cb_len; z++) {
+                        float mid;
+                        mid = c2[z] - alpha_re*c1[z] - alpha_im*dm_im[z];
+                        c2[z] = mid - c1[z];
+                        c1[z] = mid + c1[z];
+                    }
+                }
+            }
+        }
+
+        coef1 += g_len << 7;
+        coef2 += g_len << 7;
+        dmix_im += g_len << 7;
+    }
+}
+
+static const float *complex_stereo_get_filter(ChannelElement *cpe, int is_prev)
+{
+    int win, shape;
+    if (!is_prev) {
+        switch (cpe->ch[0].ics.window_sequence[0]) {
+        default:
+        case ONLY_LONG_SEQUENCE:
+        case EIGHT_SHORT_SEQUENCE:
+            win = 0;
+            break;
+        case LONG_START_SEQUENCE:
+            win = 1;
+            break;
+        case LONG_STOP_SEQUENCE:
+            win = 2;
+            break;
+        }
+
+        if (cpe->ch[0].ics.use_kb_window[0] == 0 &&
+            cpe->ch[0].ics.use_kb_window[1] == 0)
+            shape = 0;
+        else if (cpe->ch[0].ics.use_kb_window[0] == 1 &&
+                 cpe->ch[0].ics.use_kb_window[1] == 1)
+            shape = 1;
+        else if (cpe->ch[0].ics.use_kb_window[0] == 0 &&
+                 cpe->ch[0].ics.use_kb_window[1] == 1)
+            shape = 2;
+        else if (cpe->ch[0].ics.use_kb_window[0] == 1 &&
+                 cpe->ch[0].ics.use_kb_window[1] == 0)
+            shape = 3;
+        else
+            shape = 3;
+    } else {
+        win = cpe->ch[0].ics.window_sequence[0] == LONG_STOP_SEQUENCE;
+        shape = cpe->ch[0].ics.use_kb_window[1];
+    }
+
+    return ff_aac_usac_mdst_filt_cur[win][shape];
+}
+
+static void spectrum_decode(AACDecContext *ac, AACUSACConfig *usac,
+                            ChannelElement *cpe, int nb_channels)
+{
+    AACUsacStereo *us = &cpe->us;
+
+    for (int ch = 0; ch < nb_channels; ch++) {
+        SingleChannelElement *sce = &cpe->ch[ch];
+        AACUsacElemData *ue = &sce->ue;
+
+        spectrum_scale(ac, sce, ue);
+    }
+
+    if (nb_channels > 1) {
+        ac->dsp.apply_mid_side_stereo(ac, cpe);
+
+        if (us->ms_mask_mode == 3) {
+            const float *filt;
+            complex_stereo_downmix_cur(ac, cpe, us->dmix_re);
+            complex_stereo_downmix_prev(ac, cpe, us->prev_dmix_re);
+
+            filt = complex_stereo_get_filter(cpe, 0);
+            complex_stereo_interpolate_imag(us->dmix_im, us->dmix_re, filt,
+                                            usac->core_frame_len, 1, 1);
+            if (us->use_prev_frame) {
+                filt = complex_stereo_get_filter(cpe, 1);
+                complex_stereo_interpolate_imag(us->dmix_im, us->prev_dmix_re, filt,
+                                                usac->core_frame_len, -1, 1);
+            }
+
+            apply_complex_stereo(ac, cpe);
+        }
+    }
+
+    /* Save coefficients and alpha values for prediction reasons */
+    if (nb_channels > 1) {
+        AACUsacStereo *us = &cpe->us;
+        for (int ch = 0; ch < nb_channels; ch++) {
+            SingleChannelElement *sce = &cpe->ch[ch];
+            memcpy(sce->prev_coeffs, sce->coeffs, sizeof(sce->coeffs));
+        }
+        memcpy(us->prev_alpha_q_re, us->alpha_q_re, sizeof(us->alpha_q_re));
+        memcpy(us->prev_alpha_q_im, us->alpha_q_im, sizeof(us->alpha_q_im));
+    }
+
+    for (int ch = 0; ch < nb_channels; ch++) {
+        SingleChannelElement *sce = &cpe->ch[ch];
+
+        /* Apply TNS */
+        if (sce->tns.present)
+            ac->dsp.apply_tns(sce->coeffs, &sce->tns, &sce->ics, 1);
+
+        ac->oc[1].m4ac.frame_length_short ? ac->dsp.imdct_and_windowing_768(ac, sce) :
+                                            ac->dsp.imdct_and_windowing(ac, sce);
+    }
+}
+
+static int decode_usac_core_coder(AACDecContext *ac, AACUSACConfig *usac,
+                                  AACUsacElemConfig *ec, ChannelElement *che,
+                                  GetBitContext *gb, int indep_flag, int nb_channels)
+{
+    int ret;
+    int arith_reset_flag;
+    AACUsacStereo *us = &che->us;
+
+    /* Local symbols */
+    uint8_t global_gain;
+
+    us->common_window = 0;
+    che->ch[0].tns.present = che->ch[1].tns.present = 0;
+
+    for (int ch = 0; ch < nb_channels; ch++) {
+        SingleChannelElement *sce = &che->ch[ch];
+        AACUsacElemData *ue = &sce->ue;
+
+        ue->core_mode = get_bits1(gb);
+    }
+
+    if (nb_channels == 2) {
+        ret = decode_usac_stereo_info(ac, usac, ec, che, gb, indep_flag);
+        if (ret)
+            return ret;
+    }
+
+    for (int ch = 0; ch < nb_channels; ch++) {
+        SingleChannelElement *sce = &che->ch[ch];
+        IndividualChannelStream *ics = &sce->ics;
+        AACUsacElemData *ue = &sce->ue;
+
+        if (ue->core_mode) { /* lpd_channel_stream */
+            ret = ff_aac_ldp_parse_channel_stream(ac, usac, ue, gb);
+            if (ret < 0)
+                return ret;
+        }
+
+        if ((nb_channels == 1) ||
+            (che->ch[0].ue.core_mode != che->ch[1].ue.core_mode))
+            sce->tns.present = get_bits1(gb);
+
+        /* fd_channel_stream */
+        global_gain = get_bits(gb, 8);
+
+        ue->noise.level = 0;
+        if (ec->noise_fill) {
+            ue->noise.level = get_bits(gb, 3);
+            ue->noise.offset = get_bits(gb, 5);
+        }
+
+        if (!us->common_window) {
+            /* ics_info() */
+            ics->window_sequence[1] = ics->window_sequence[0];
+            ics->window_sequence[0] = get_bits(gb, 2);
+            ics->use_kb_window[1] = ics->use_kb_window[0];
+            ics->use_kb_window[0] = get_bits1(gb);
+            if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
+                ics->max_sfb = get_bits(gb, 4);
+                ue->scale_factor_grouping = get_bits(gb, 7);
+            } else {
+                ics->max_sfb = get_bits(gb, 6);
+            }
+
+            ret = setup_sce(ac, sce, usac);
+            if (ret < 0)
+                return ret;
+        }
+
+        if (ec->tw_mdct && !us->common_tw) {
+            /* tw_data() */
+            if (get_bits1(gb)) { /* tw_data_present */
+                /* Time warping is not supported in baseline profile streams. */
+                avpriv_report_missing_feature(ac->avctx,
+                                              "AAC USAC timewarping");
+                return AVERROR_PATCHWELCOME;
+            }
+        }
+
+        ret = decode_usac_scale_factors(ac, sce, gb, global_gain);
+        if (ret < 0)
+            return ret;
+
+        ac->dsp.dequant_scalefactors(sce);
+
+        if (sce->tns.present) {
+            ret = ff_aac_decode_tns(ac, &sce->tns, gb, ics);
+            if (ret < 0)
+                return ret;
+        }
+
+        /* ac_spectral_data */
+        arith_reset_flag = indep_flag;
+        if (!arith_reset_flag)
+            arith_reset_flag = get_bits1(gb);
+
+        /* Decode coeffs */
+        memset(&sce->coeffs[0], 0, 1024*sizeof(float));
+        for (int win = 0; win < ics->num_windows; win++) {
+            int lg = ics->swb_offset[ics->max_sfb];
+            int N;
+            if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE)
+                N = usac->core_frame_len / 8;
+            else
+                N = usac->core_frame_len;
+
+            ret = decode_spectrum_and_dequant_ac(ac, sce->coeffs + win*128, gb,
+                                                 sce->sf, &ue->ac,
+                                                 arith_reset_flag && (win == 0),
+                                                 lg, N);
+            if (ret < 0)
+                return ret;
+        }
+
+        if (get_bits1(gb)) { /* fac_data_present */
+            const uint16_t len_8 = usac->core_frame_len / 8;
+            const uint16_t len_16 = usac->core_frame_len / 16;
+            const uint16_t fac_len = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE ? len_8 : len_16;
+            ret = ff_aac_parse_fac_data(ue, gb, 1, fac_len);
+            if (ret < 0)
+                return ret;
+        }
+    }
+
+    /* Value is unset otherwise */
+    if (nb_channels > 1 && !us->common_window)
+        che->max_sfb_ste = FFMAX(che->ch[0].ics.max_sfb, che->ch[1].ics.max_sfb);
+
+    spectrum_decode(ac, usac, che, nb_channels);
+
+    return 0;
+}
+
+static int parse_audio_preroll(AACDecContext *ac, GetBitContext *gb)
+{
+    int ret = 0;
+    GetBitContext gbc;
+    OutputConfiguration *oc = &ac->oc[1];
+    MPEG4AudioConfig *m4ac = &oc->m4ac;
+    MPEG4AudioConfig m4ac_bak = oc->m4ac;
+    uint8_t temp_data[512];
+    uint8_t *tmp_buf = temp_data;
+    size_t tmp_buf_size = sizeof(temp_data);
+
+    av_unused int crossfade;
+    int num_preroll_frames;
+
+    int config_len = get_escaped_value(gb, 4, 4, 8);
+
+    /* Implementations are free to pad the config to any length, so use a
+     * different reader for this. */
+    gbc = *gb;
+    ret = ff_aac_usac_config_decode(ac, ac->avctx, &gbc, oc, m4ac->chan_config);
+    if (ret < 0) {
+        *m4ac = m4ac_bak;
+        return ret;
+    } else {
+        ac->oc[1].m4ac.chan_config = 0;
+    }
+
+    /* 7.18.3.3 Bitrate adaption
+     * If configuration didn't change after applying preroll, continue
+     * without decoding it. */
+    if (!memcmp(m4ac, &m4ac_bak, sizeof(m4ac_bak)))
+        return 0;
+
+    skip_bits_long(gb, config_len*8);
+
+    crossfade = get_bits1(gb); /* applyCrossfade */
+    skip_bits1(gb); /* reserved */
+    num_preroll_frames = get_escaped_value(gb, 2, 4, 0); /* numPreRollFrames */
+
+    for (int i = 0; i < num_preroll_frames; i++) {
+        int got_frame_ptr = 0;
+        int au_len = get_escaped_value(gb, 16, 16, 0);
+
+        if (au_len*8 > tmp_buf_size) {
+            uint8_t *tmp2;
+            tmp_buf = tmp_buf == temp_data ? NULL : tmp_buf;
+            tmp2 = realloc(tmp_buf, au_len*8);
+            if (!tmp2) {
+                if (tmp_buf != temp_data)
+                    av_free(tmp_buf);
+                return AVERROR(ENOMEM);
+            }
+            tmp_buf = tmp2;
+        }
+
+        /* Byte alignment is not guaranteed. */
+        for (int i = 0; i < au_len; i++)
+            tmp_buf[i] = get_bits(gb, 8);
+
+        ret = init_get_bits8(&gbc, tmp_buf, au_len);
+        if (ret < 0)
+            break;
+
+        ret = ff_aac_usac_decode_frame(ac->avctx, ac, &gbc, &got_frame_ptr);
+        if (ret < 0)
+            break;
+    }
+
+    if (tmp_buf != temp_data)
+        av_free(tmp_buf);
+
+    return 0;
+}
+
+static int parse_ext_ele(AACDecContext *ac, AACUsacElemConfig *e,
+                         GetBitContext *gb)
+{
+    uint8_t *tmp;
+    uint8_t pl_frag_start = 1;
+    uint8_t pl_frag_end = 1;
+    uint32_t len;
+
+    if (!get_bits1(gb)) /* usacExtElementPresent */
+        return 0;
+
+    if (get_bits1(gb)) { /* usacExtElementUseDefaultLength */
+        len = e->ext.default_len;
+    } else {
+        len = get_bits(gb, 8); /* usacExtElementPayloadLength */
+        if (len == 255)
+            len += get_bits(gb, 16) - 2;
+    }
+
+    if (!len)
+        return 0;
+
+    if (e->ext.payload_frag) {
+        pl_frag_start = get_bits1(gb); /* usacExtElementStart */
+        pl_frag_end = get_bits1(gb); /* usacExtElementStop */
+    }
+
+    if (pl_frag_start)
+        e->ext.pl_data_offset = 0;
+
+    /* If an extension starts and ends this packet, we can directly use it */
+    if (!(pl_frag_start && pl_frag_end)) {
+        tmp = av_realloc(e->ext.pl_data, e->ext.pl_data_offset + len);
+        if (!tmp) {
+            av_free(e->ext.pl_data);
+            return AVERROR(ENOMEM);
+        }
+        e->ext.pl_data = tmp;
+
+        /* Readout data to a buffer */
+        for (int i = 0; i < len; i++)
+            e->ext.pl_data[e->ext.pl_data_offset + i] = get_bits(gb, 8);
+    }
+
+    e->ext.pl_data_offset += len;
+
+    if (pl_frag_end) {
+        int ret = 0;
+        int start_bits = get_bits_count(gb);
+        const int pl_len = e->ext.pl_data_offset;
+        GetBitContext *gb2 = gb;
+        GetBitContext gbc;
+        if (!(pl_frag_start && pl_frag_end)) {
+            ret = init_get_bits8(&gbc, e->ext.pl_data, pl_len);
+            if (ret < 0)
+                return ret;
+
+            gb2 = &gbc;
+        }
+
+        switch (e->ext.type) {
+        case ID_EXT_ELE_FILL:
+            /* Filler elements have no usable payload */
+            break;
+        case ID_EXT_ELE_AUDIOPREROLL:
+            ret = parse_audio_preroll(ac, gb2);
+            break;
+        default:
+            /* This should never happen */
+            av_assert0(0);
+        }
+        av_freep(&e->ext.pl_data);
+        if (ret < 0)
+            return ret;
+
+        skip_bits_long(gb, pl_len*8 - (get_bits_count(gb) - start_bits));
+    }
+
+    return 0;
+}
+
+int ff_aac_usac_decode_frame(AVCodecContext *avctx, AACDecContext *ac,
+                             GetBitContext *gb, int *got_frame_ptr)
+{
+    int ret, nb_ch_el, is_dmono = 0;
+    int indep_flag, samples = 0;
+    int audio_found = 0, sce_count = 0;
+    AVFrame *frame = ac->frame;
+
+    ff_aac_output_configure(ac, ac->oc[1].layout_map, ac->oc[1].layout_map_tags,
+                            ac->oc[1].status, 0);
+
+    indep_flag = get_bits1(gb);
+
+    nb_ch_el = 0;
+    for (int i = 0; i < ac->oc[1].usac.nb_elems; i++) {
+        AACUsacElemConfig *e = &ac->oc[1].usac.elems[i];
+        ChannelElement *che;
+
+        switch (e->type) {
+        case ID_USAC_SCE:
+            che = ff_aac_get_che(ac, TYPE_SCE, nb_ch_el++);
+            if (!che) {
+                av_log(ac->avctx, AV_LOG_ERROR,
+                       "channel element %d.%d is not allocated\n",
+                       TYPE_SCE, nb_ch_el - 1);
+                return AVERROR_INVALIDDATA;
+            }
+
+            ret = decode_usac_core_coder(ac, &ac->oc[1].usac, e, che, gb,
+                                         indep_flag, 1);
+            if (ret < 0)
+                return ret;
+
+            sce_count++;
+            audio_found = 1;
+            che->present = 1;
+            samples = ac->oc[1].m4ac.frame_length_short ? 768 : 1024;
+            break;
+        case ID_USAC_CPE:
+            che = ff_aac_get_che(ac, TYPE_CPE, nb_ch_el++);
+            if (!che) {
+                av_log(ac->avctx, AV_LOG_ERROR,
+                       "channel element %d.%d is not allocated\n",
+                       TYPE_SCE, nb_ch_el - 1);
+                return AVERROR_INVALIDDATA;
+            }
+
+            ret = decode_usac_core_coder(ac, &ac->oc[1].usac, e, che, gb,
+                                         indep_flag, 2);
+            if (ret < 0)
+                return ret;
+
+            audio_found = 1;
+            che->present = 1;
+            samples = ac->oc[1].m4ac.frame_length_short ? 768 : 1024;
+            break;
+        case ID_USAC_LFE:
+            avpriv_report_missing_feature(ac->avctx,
+                                          "AAC USAC LFE");
+            return AVERROR_PATCHWELCOME;
+            break;
+        case ID_USAC_EXT:
+            ret = parse_ext_ele(ac, e, gb);
+            if (ret < 0)
+                return ret;
+            break;
+        }
+    }
+
+    if (ac->oc[1].status && audio_found) {
+        avctx->sample_rate = ac->oc[1].m4ac.sample_rate;
+        avctx->frame_size = samples;
+        ac->oc[1].status = OC_LOCKED;
+    }
+
+    if (!frame->data[0] && samples) {
+        av_log(avctx, AV_LOG_ERROR, "no frame data found\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (samples) {
+        frame->nb_samples = samples;
+        frame->sample_rate = avctx->sample_rate;
+        frame->flags = indep_flag ? AV_FRAME_FLAG_KEY : 0x0;
+        *got_frame_ptr = 1;
+    } else {
+        av_frame_unref(ac->frame);
+        frame->flags = indep_flag ? AV_FRAME_FLAG_KEY : 0x0;
+        *got_frame_ptr = 0;
+    }
+
+    /* for dual-mono audio (SCE + SCE) */
+    is_dmono = ac->dmono_mode && sce_count == 2 &&
+               !av_channel_layout_compare(&ac->oc[1].ch_layout,
+                                          &(AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO);
+    if (is_dmono) {
+        if (ac->dmono_mode == 1)
+            frame->data[1] = frame->data[0];
+        else if (ac->dmono_mode == 2)
+            frame->data[0] = frame->data[1];
+    }
+
+    return 0;
+}
diff --git a/libavcodec/aac/aacdec_usac.h b/libavcodec/aac/aacdec_usac.h
new file mode 100644
index 0000000000..635c85acb7
--- /dev/null
+++ b/libavcodec/aac/aacdec_usac.h
@@ -0,0 +1,39 @@ 
+/*
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/opusdsp.h"
+
+#ifndef AVCODEC_AAC_AACDEC_USAC_H
+#define AVCODEC_AAC_AACDEC_USAC_H
+
+#include "aacdec.h"
+
+#include "libavcodec/get_bits.h"
+
+int ff_aac_usac_config_decode(AACDecContext *ac, AVCodecContext *avctx,
+                              GetBitContext *gb, OutputConfiguration *oc,
+                              int channel_config);
+
+int ff_aac_usac_reset_state(AACDecContext *ac, OutputConfiguration *oc);
+
+int ff_aac_usac_decode_frame(AVCodecContext *avctx, AACDecContext *ac,
+                             GetBitContext *gb, int *got_frame_ptr);
+
+#endif /* AVCODEC_AAC_AACDEC_USAC_H */
diff --git a/libavcodec/aactab.c b/libavcodec/aactab.c
index 18afa69bad..7b040531aa 100644
--- a/libavcodec/aactab.c
+++ b/libavcodec/aactab.c
@@ -1998,6 +1998,11 @@  const uint8_t ff_tns_max_bands_128[] = {
 };
 // @}
 
+const uint8_t ff_usac_noise_fill_start_offset[2][2] = {
+    { 160, 20 },
+    { 120, 15 },
+};
+
 const DECLARE_ALIGNED(32, float, ff_aac_eld_window_512)[1920] = {
      0.00338834,  0.00567745,  0.00847677,  0.01172641,
      0.01532555,  0.01917664,  0.02318809,  0.02729259,
@@ -3895,3 +3900,40 @@  DECLARE_ALIGNED(16, const float, ff_aac_deemph_weights)[16] = {
     0,
     USAC_EMPH_COEFF,
 };
+
+const int ff_aac_usac_samplerate[32] = {
+    96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
+    16000, 12000, 11025,  8000,  7350,    -1,    -1, 57600,
+    51200, 40000, 38400, 34150, 28800, 25600, 20000, 19200,
+    17075, 14400, 12800, 9600,     -1,    -1,    -1,    -1,
+};
+
+/* Window type (only long+eight, start/stop/stopstart), sine+sine, kbd+kbd, sine+kbd, kbd+sine */
+const float ff_aac_usac_mdst_filt_cur[4 /* Window */][4 /* Shape */][7] =
+{
+    { { 0.000000,  0.000000,  0.500000, 0.000000, -0.500000,  0.000000,  0.000000 },
+      { 0.091497,  0.000000,  0.581427, 0.000000, -0.581427,  0.000000, -0.091497 },
+      { 0.045748,  0.057238,  0.540714, 0.000000, -0.540714, -0.057238, -0.045748 },
+      { 0.045748, -0.057238,  0.540714, 0.000000, -0.540714,  0.057238, -0.045748 } },
+    { { 0.102658,  0.103791,  0.567149, 0.000000, -0.567149, -0.103791, -0.102658 },
+      { 0.150512,  0.047969,  0.608574, 0.000000, -0.608574, -0.047969, -0.150512 },
+      { 0.104763,  0.105207,  0.567861, 0.000000, -0.567861, -0.105207, -0.104763 },
+      { 0.148406,  0.046553,  0.607863, 0.000000, -0.607863, -0.046553, -0.148406 } },
+    { { 0.102658, -0.103791,  0.567149, 0.000000, -0.567149,  0.103791, -0.102658 },
+      { 0.150512, -0.047969,  0.608574, 0.000000, -0.608574,  0.047969, -0.150512 },
+      { 0.148406, -0.046553,  0.607863, 0.000000, -0.607863,  0.046553, -0.148406 },
+      { 0.104763, -0.105207,  0.567861, 0.000000, -0.567861,  0.105207, -0.104763 } },
+    { { 0.205316,  0.000000,  0.634298, 0.000000, -0.634298,  0.000000, -0.205316 },
+      { 0.209526,  0.000000,  0.635722, 0.000000, -0.635722,  0.000000, -0.209526 },
+      { 0.207421,  0.001416,  0.635010, 0.000000, -0.635010, -0.001416, -0.207421 },
+      { 0.207421, -0.001416,  0.635010, 0.000000, -0.635010,  0.001416, -0.207421 } }
+};
+
+/* Window type (everything/longstop+stopstart), sine or kbd */
+const float ff_aac_usac_mdst_filt_prev[2 /* Window */][2 /* sine/kbd */][7] =
+{
+    { { 0.000000, 0.106103, 0.250000, 0.318310, 0.250000, 0.106103, 0.000000 },
+      { 0.059509, 0.123714, 0.186579, 0.213077, 0.186579, 0.123714, 0.059509 } },
+    { { 0.038498, 0.039212, 0.039645, 0.039790, 0.039645, 0.039212, 0.038498 },
+      { 0.026142, 0.026413, 0.026577, 0.026631, 0.026577, 0.026413, 0.026142 } }
+};
diff --git a/libavcodec/aactab.h b/libavcodec/aactab.h
index 481fc57d93..8dbb2098c5 100644
--- a/libavcodec/aactab.h
+++ b/libavcodec/aactab.h
@@ -115,4 +115,14 @@  extern const uint8_t ff_tns_max_bands_512 [13];
 extern const uint8_t ff_tns_max_bands_480 [13];
 extern const uint8_t ff_tns_max_bands_128 [13];
 
+/* [x][y], x == 1 -> frame len is 768 frames, y == 1 -> is eight_short */
+extern const uint8_t ff_usac_noise_fill_start_offset[2][2];
+
+extern const int ff_aac_usac_samplerate[32];
+
+/* Window type (only long+eight, start/stop/stopstart), sine+sine, kbd+kbd, sine+kbd, kbd+sine */
+extern const float ff_aac_usac_mdst_filt_cur[4 /* Window */][4 /* Shape */][7];
+/* Window type (everything/longstop+stopstart), sine or kbd */
+extern const float ff_aac_usac_mdst_filt_prev[2 /* Window */][2 /* sine/kbd */][7];
+
 #endif /* AVCODEC_AACTAB_H */