diff mbox series

[FFmpeg-devel,v4,1/2] libavcodec/flacdec: Implement decoding of 32 bit-per-sample PCM

Message ID 20220903092144.569716-2-mvanb1@gmail.com
State Superseded
Headers show
Series 32bps FLAC patches | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Martijn van Beurden Sept. 3, 2022, 9:21 a.m. UTC
Add decoding of FLAC files coding for 32 bit-per-sample PCM to libavcodec.
---
 libavcodec/flac.c     |   4 +-
 libavcodec/flacdec.c  | 248 ++++++++++++++++++++++++++++++++++++++----
 libavcodec/get_bits.h |  12 ++
 libavcodec/mathops.h  |   9 ++
 4 files changed, 250 insertions(+), 23 deletions(-)

Comments

Andreas Rheinhardt Sept. 14, 2022, 1:03 p.m. UTC | #1
Martijn van Beurden:
> Add decoding of FLAC files coding for 32 bit-per-sample PCM to libavcodec.
> ---
>  libavcodec/flac.c     |   4 +-
>  libavcodec/flacdec.c  | 248 ++++++++++++++++++++++++++++++++++++++----
>  libavcodec/get_bits.h |  12 ++
>  libavcodec/mathops.h  |   9 ++
>  4 files changed, 250 insertions(+), 23 deletions(-)
> 
> diff --git a/libavcodec/flac.c b/libavcodec/flac.c
> index 352d663c67..174b4801be 100644
> --- a/libavcodec/flac.c
> +++ b/libavcodec/flac.c
> @@ -28,7 +28,7 @@
>  #include "flacdata.h"
>  #include "flac_parse.h"
>  
> -static const int8_t sample_size_table[] = { 0, 8, 12, 0, 16, 20, 24, 0 };
> +static const int8_t sample_size_table[] = { 0, 8, 12, 0, 16, 20, 24, 32 };
>  
>  static const AVChannelLayout flac_channel_layouts[8] = {
>      AV_CHANNEL_LAYOUT_MONO,
> @@ -82,7 +82,7 @@ int ff_flac_decode_frame_header(AVCodecContext *avctx, GetBitContext *gb,
>  
>      /* bits per sample */
>      bps_code = get_bits(gb, 3);
> -    if (bps_code == 3 || bps_code == 7) {
> +    if (bps_code == 3) {
>          av_log(avctx, AV_LOG_ERROR + log_level_offset,
>                 "invalid sample size code (%d)\n",
>                 bps_code);
> diff --git a/libavcodec/flacdec.c b/libavcodec/flacdec.c
> index c5d9e95168..14a9202569 100644
> --- a/libavcodec/flacdec.c
> +++ b/libavcodec/flacdec.c
> @@ -64,6 +64,9 @@ typedef struct FLACContext {
>      int32_t *decoded[FLAC_MAX_CHANNELS];    ///< decoded samples
>      uint8_t *decoded_buffer;
>      unsigned int decoded_buffer_size;
> +    int64_t *decoded_33bps;                  ///< decoded samples for a 33 bps subframe
> +    uint8_t *decoded_buffer_33bps;

Why do you need a new buffer here instead of just reusing decoded_buffer?

> +    unsigned int decoded_buffer_size_33bps;
>      int buggy_lpc;                          ///< use workaround for old lavc encoded files
>  
>      FLACDSPContext dsp;
> @@ -154,6 +157,24 @@ static int allocate_buffers(FLACContext *s)
>                                   s->stream_info.channels,
>                                   s->stream_info.max_blocksize,
>                                   AV_SAMPLE_FMT_S32P, 0);
> +    if (ret >= 0 && s->stream_info.bps == 32 && s->stream_info.channels == 2) {
> +        buf_size = av_samples_get_buffer_size(NULL, 1,
> +                                              s->stream_info.max_blocksize,
> +                                              AV_SAMPLE_FMT_S64P, 0);
> +        if (buf_size < 0)
> +            return buf_size;
> +
> +        av_fast_malloc(&s->decoded_buffer_33bps, &s->decoded_buffer_size_33bps, buf_size);
> +        if (!s->decoded_buffer)

You are checking the wrong variable.

> +            return AVERROR(ENOMEM);
> +
> +        ret = av_samples_fill_arrays((uint8_t **)&s->decoded_33bps, NULL,
> +                                     s->decoded_buffer_33bps,
> +                                     1,
> +                                     s->stream_info.max_blocksize,
> +                                     AV_SAMPLE_FMT_S64P, 0);
> +
> +    }
>      return ret < 0 ? ret : 0;
>  }
>  
> @@ -331,6 +352,94 @@ static int decode_subframe_fixed(FLACContext *s, int32_t *decoded,
>      return 0;
>  }
>  
> +static int decode_subframe_fixed_wide(FLACContext *s, int32_t *decoded,
> +                                      int pred_order, int bps)
> +{
> +    const int blocksize = s->blocksize;
> +    int i;
> +    int ret;
> +
> +    /* warm up samples */
> +    for (i = 0; i < pred_order; i++) {
> +        decoded[i] = get_sbits_long(&s->gb, bps);
> +    }
> +
> +    if ((ret = decode_residuals(s, decoded, pred_order)) < 0)
> +        return ret;
> +
> +    switch (pred_order) {
> +    case 0:
> +        break;
> +    case 1:
> +        for (i = pred_order; i < blocksize; i++)
> +            decoded[i] += decoded[i-1];
> +        break;
> +    case 2:
> +        for (i = pred_order; i < blocksize; i++)
> +            decoded[i] = (int64_t)decoded[i] + 2*(int64_t)decoded[i-1] - (int64_t)decoded[i-2];
> +        break;
> +    case 3:
> +        for (i = pred_order; i < blocksize; i++)
> +            decoded[i] = (int64_t)decoded[i] + 3*(int64_t)decoded[i-1] - 3*(int64_t)decoded[i-2] + (int64_t)decoded[i-3];
> +        break;
> +    case 4:
> +        for (i = pred_order; i < blocksize; i++)
> +            decoded[i] = (int64_t)decoded[i] + 4*(int64_t)decoded[i-1] - 6*(int64_t)decoded[i-2] + 4*(int64_t)decoded[i-3] - (int64_t)decoded[i-4];
> +        break;
> +    default:
> +        av_log(s->avctx, AV_LOG_ERROR, "illegal pred order %d\n", pred_order);
> +        return AVERROR_INVALIDDATA;
> +    }
> +
> +    return 0;
> +}
> +
> +
> +static int decode_subframe_fixed_33bps(FLACContext *s, int64_t *decoded,
> +                                       int32_t *residual, int pred_order)
> +{
> +    const int blocksize = s->blocksize;
> +    int i;
> +    int ret;
> +
> +    /* warm up samples */
> +    for (i = 0; i < pred_order; i++) {
> +        decoded[i] = get_sbits64(&s->gb, 33);
> +        av_log(s->avctx, AV_LOG_DEBUG, "warm-up %d = %" PRId64 "\n", i, decoded[i]);
> +    }
> +
> +    if ((ret = decode_residuals(s, residual, pred_order)) < 0)
> +        return ret;
> +
> +    switch (pred_order) {
> +    case 0:
> +        for (i = pred_order; i < blocksize; i++)
> +            decoded[i] = residual[i];
> +        break;
> +    case 1:
> +        for (i = pred_order; i < blocksize; i++)
> +            decoded[i] = residual[i] + decoded[i-1];
> +        break;
> +    case 2:
> +        for (i = pred_order; i < blocksize; i++)
> +            decoded[i] = residual[i] + 2*decoded[i-1] - decoded[i-2];
> +        break;
> +    case 3:
> +        for (i = pred_order; i < blocksize; i++)
> +            decoded[i] = residual[i] + 3*decoded[i-1] - 3*decoded[i-2] + decoded[i-3];
> +        break;
> +    case 4:
> +        for (i = pred_order; i < blocksize; i++)
> +            decoded[i] = residual[i] + 4*decoded[i-1] - 6*decoded[i-2] + 4*decoded[i-3] - decoded[i-4];
> +        break;
> +    default:
> +        av_log(s->avctx, AV_LOG_ERROR, "illegal pred order %d\n", pred_order);
> +        return AVERROR_INVALIDDATA;
> +    }
> +
> +    return 0;
> +}
> +
>  static void lpc_analyze_remodulate(SUINT32 *decoded, const int coeffs[32],
>                                     int order, int qlevel, int len, int bps)
>  {
> @@ -402,12 +511,53 @@ static int decode_subframe_lpc(FLACContext *s, int32_t *decoded, int pred_order,
>      return 0;
>  }
>  
> +static int decode_subframe_lpc_33bps(FLACContext *s, int64_t *decoded,
> +                                     int32_t *residual, int pred_order)
> +{
> +    int i, j, ret;
> +    int coeff_prec, qlevel;
> +    int coeffs[32];
> +
> +    /* warm up samples */
> +    for (i = 0; i < pred_order; i++) {
> +        decoded[i] = get_sbits64(&s->gb, 33);
> +    }
> +
> +    coeff_prec = get_bits(&s->gb, 4) + 1;
> +    if (coeff_prec == 16) {
> +        av_log(s->avctx, AV_LOG_ERROR, "invalid coeff precision\n");
> +        return AVERROR_INVALIDDATA;
> +    }
> +    qlevel = get_sbits(&s->gb, 5);
> +    if (qlevel < 0) {
> +        av_log(s->avctx, AV_LOG_ERROR, "qlevel %d not supported, maybe buggy stream\n",
> +               qlevel);
> +        return AVERROR_INVALIDDATA;
> +    }
> +
> +    for (i = 0; i < pred_order; i++) {
> +        coeffs[pred_order - i - 1] = get_sbits(&s->gb, coeff_prec);
> +    }
> +
> +    if ((ret = decode_residuals(s, residual, pred_order)) < 0)
> +        return ret;
> +
> +    for (i = pred_order; i < s->blocksize; i++, decoded++) {
> +        int64_t sum = 0;
> +        for (j = 0; j < pred_order; j++)
> +            sum += (int64_t)coeffs[j] * decoded[j];
> +        decoded[j] = residual[i] + (sum >> qlevel);
> +    }
> +
> +    return 0;
> +}
> +
>  static inline int decode_subframe(FLACContext *s, int channel)
>  {
>      int32_t *decoded = s->decoded[channel];
>      int type, wasted = 0;
>      int bps = s->stream_info.bps;
> -    int i, tmp, ret;
> +    int i, ret;
>  
>      if (channel == 0) {
>          if (s->ch_mode == FLAC_CHMODE_RIGHT_SIDE)
> @@ -436,34 +586,63 @@ static inline int decode_subframe(FLACContext *s, int channel)
>          wasted = 1 + get_unary(&s->gb, 1, get_bits_left(&s->gb));
>          bps -= wasted;
>      }
> -    if (bps > 32) {
> -        avpriv_report_missing_feature(s->avctx, "Decorrelated bit depth > 32");
> -        return AVERROR_PATCHWELCOME;
> -    }
>  
>  //FIXME use av_log2 for types
>      if (type == 0) {
> -        tmp = get_sbits_long(&s->gb, bps);
> -        for (i = 0; i < s->blocksize; i++)
> -            decoded[i] = tmp;
> +        if (bps < 33) {
> +            int32_t tmp = get_sbits_long(&s->gb, bps);
> +            for (i = 0; i < s->blocksize; i++)
> +                decoded[i] = tmp;
> +        } else {
> +            int64_t tmp = get_sbits64(&s->gb, 33);
> +            for (i = 0; i < s->blocksize; i++)
> +                s->decoded_33bps[i] = tmp;
> +        }
>      } else if (type == 1) {
> -        for (i = 0; i < s->blocksize; i++)
> -            decoded[i] = get_sbits_long(&s->gb, bps);
> +        if (bps < 33) {
> +            for (i = 0; i < s->blocksize; i++)
> +                decoded[i] = get_sbits_long(&s->gb, bps);
> +        } else {
> +            for (i = 0; i < s->blocksize; i++)
> +                s->decoded_33bps[i] = get_sbits64(&s->gb, 33);
> +        }
>      } else if ((type >= 8) && (type <= 12)) {
> -        if ((ret = decode_subframe_fixed(s, decoded, type & ~0x8, bps)) < 0)
> -            return ret;
> +        int order = type & ~0x8;
> +        if (bps < 33) {
> +            if (bps + order <= 32) {
> +                if ((ret = decode_subframe_fixed(s, decoded, order, bps)) < 0)
> +                    return ret;
> +            } else {
> +                if ((ret = decode_subframe_fixed_wide(s, decoded, order, bps)) < 0)
> +                    return ret;
> +            }
> +        } else {
> +            if ((ret = decode_subframe_fixed_33bps(s, s->decoded_33bps, decoded, order)) < 0)
> +                return ret;
> +        }
>      } else if (type >= 32) {
> -        if ((ret = decode_subframe_lpc(s, decoded, (type & ~0x20)+1, bps)) < 0)
> -            return ret;
> +        if (bps < 33) {
> +            if ((ret = decode_subframe_lpc(s, decoded, (type & ~0x20)+1, bps)) < 0)
> +                return ret;
> +        } else {
> +            if ((ret = decode_subframe_lpc_33bps(s, s->decoded_33bps, decoded, (type & ~0x20)+1)) < 0)
> +                return ret;
> +        }
>      } else {
>          av_log(s->avctx, AV_LOG_ERROR, "invalid coding type\n");
>          return AVERROR_INVALIDDATA;
>      }
>  
> -    if (wasted && wasted < 32) {
> -        int i;
> -        for (i = 0; i < s->blocksize; i++)
> -            decoded[i] = (unsigned)decoded[i] << wasted;
> +    if (wasted) {
> +        if (wasted+bps == 33) {
> +            int i;
> +            for (i = 0; i < s->blocksize; i++)
> +                s->decoded_33bps[i] = (uint64_t)decoded[i] << wasted;
> +        } else if (wasted < 32) {
> +            int i;
> +            for (i = 0; i < s->blocksize; i++)
> +                decoded[i] = (unsigned)decoded[i] << wasted;
> +        }
>      }
>  
>      return 0;
> @@ -554,6 +733,26 @@ static int decode_frame(FLACContext *s)
>      return 0;
>  }
>  
> +static void decorrelate_33bps(int ch_mode, int32_t **decoded, int64_t *decoded_33bps, int len)
> +{
> +    int i;
> +    if (ch_mode == FLAC_CHMODE_LEFT_SIDE ) {
> +        for (i = 0; i < len; i++)
> +           decoded[1][i] = decoded[0][i] - decoded_33bps[i];
> +    } else if (ch_mode == FLAC_CHMODE_RIGHT_SIDE ) {
> +        for (i = 0; i < len; i++)
> +           decoded[0][i] = decoded[1][i] + decoded_33bps[i];
> +    } else if (ch_mode == FLAC_CHMODE_MID_SIDE ) {
> +        for (i = 0; i < len; i++) {
> +            uint64_t a = decoded[0][i];
> +            int64_t b = decoded_33bps[i];
> +            a -= b >> 1;
> +            decoded[0][i] = (a + b);
> +            decoded[1][i] = a;
> +        }
> +    }
> +}
> +
>  static int flac_decode_frame(AVCodecContext *avctx, AVFrame *frame,
>                               int *got_frame_ptr, AVPacket *avpkt)
>  {
> @@ -612,9 +811,15 @@ static int flac_decode_frame(AVCodecContext *avctx, AVFrame *frame,
>      if ((ret = ff_thread_get_buffer(avctx, frame, 0)) < 0)
>          return ret;
>  
> -    s->dsp.decorrelate[s->ch_mode](frame->data, s->decoded,
> -                                   s->stream_info.channels,
> -                                   s->blocksize, s->sample_shift);
> +    if (s->stream_info.bps == 32 && s->ch_mode > 0) {
> +        decorrelate_33bps(s->ch_mode, s->decoded, s->decoded_33bps, s->blocksize);
> +        s->dsp.decorrelate[0](frame->data, s->decoded, s->stream_info.channels,
> +                              s->blocksize, s->sample_shift);
> +    } else {
> +        s->dsp.decorrelate[s->ch_mode](frame->data, s->decoded,
> +                                       s->stream_info.channels,
> +                                       s->blocksize, s->sample_shift);
> +    }
>  
>      if (bytes_read > buf_size) {
>          av_log(s->avctx, AV_LOG_ERROR, "overread: %d\n", bytes_read - buf_size);
> @@ -635,6 +840,7 @@ static av_cold int flac_decode_close(AVCodecContext *avctx)
>      FLACContext *s = avctx->priv_data;
>  
>      av_freep(&s->decoded_buffer);
> +    av_freep(&s->decoded_buffer_33bps);
>  
>      return 0;
>  }
> diff --git a/libavcodec/get_bits.h b/libavcodec/get_bits.h
> index 992765dc92..52d13b8242 100644
> --- a/libavcodec/get_bits.h
> +++ b/libavcodec/get_bits.h
> @@ -596,6 +596,18 @@ static inline int get_sbits_long(GetBitContext *s, int n)
>      return sign_extend(get_bits_long(s, n), n);
>  }
>  
> +/**
> + * Read 0-64 bits as a signed integer.
> + */
> +static inline int64_t get_sbits64(GetBitContext *s, int n)
> +{
> +    // sign_extend(x, 0) is undefined
> +    if (!n)
> +        return 0;
> +
> +    return sign_extend64(get_bits64(s, n), n);
> +}
> +
>  /**
>   * Show 0-32 bits.
>   */
> diff --git a/libavcodec/mathops.h b/libavcodec/mathops.h
> index f81d21f9c4..8a82d9d086 100644
> --- a/libavcodec/mathops.h
> +++ b/libavcodec/mathops.h
> @@ -137,6 +137,15 @@ static inline av_const int sign_extend(int val, unsigned bits)
>  }
>  #endif
>  
> +#ifndef sign_extend64
> +static inline av_const int64_t sign_extend64(int64_t val, unsigned bits)
> +{
> +    unsigned shift = 8 * sizeof(int64_t) - bits;
> +    union { uint64_t u; int64_t s; } v = { (uint64_t) val << shift };
> +    return v.s >> shift;
> +}
> +#endif
> +
>  #ifndef zero_extend
>  static inline av_const unsigned zero_extend(unsigned val, unsigned bits)
>  {
Martijn van Beurden Sept. 14, 2022, 3:21 p.m. UTC | #2
Op wo 14 sep. 2022 om 15:03 schreef Andreas Rheinhardt <
andreas.rheinhardt@outlook.com>:

> > --- a/libavcodec/flacdec.c
> > +++ b/libavcodec/flacdec.c
> > @@ -64,6 +64,9 @@ typedef struct FLACContext {
> >      int32_t *decoded[FLAC_MAX_CHANNELS];    ///< decoded samples
> >      uint8_t *decoded_buffer;
> >      unsigned int decoded_buffer_size;
> > +    int64_t *decoded_33bps;                  ///< decoded samples for a
> 33 bps subframe
> > +    uint8_t *decoded_buffer_33bps;
>
> Why do you need a new buffer here instead of just reusing decoded_buffer?
>
>
I don't follow, do you mean I could have decoded_33bps use decoded_buffer
too? Wouldn't that kind of aliasing lead to problems?


> > +        av_fast_malloc(&s->decoded_buffer_33bps,
> &s->decoded_buffer_size_33bps, buf_size);
> > +        if (!s->decoded_buffer)
>
> You are checking the wrong variable.
>
>
I'll fix that, thanks!
Paul B Mahol Sept. 14, 2022, 3:48 p.m. UTC | #3
On 9/14/22, Martijn van Beurden <mvanb1@gmail.com> wrote:
> Op wo 14 sep. 2022 om 15:03 schreef Andreas Rheinhardt <
> andreas.rheinhardt@outlook.com>:
>
>> > --- a/libavcodec/flacdec.c
>> > +++ b/libavcodec/flacdec.c
>> > @@ -64,6 +64,9 @@ typedef struct FLACContext {
>> >      int32_t *decoded[FLAC_MAX_CHANNELS];    ///< decoded samples
>> >      uint8_t *decoded_buffer;
>> >      unsigned int decoded_buffer_size;
>> > +    int64_t *decoded_33bps;                  ///< decoded samples for
>> > a
>> 33 bps subframe
>> > +    uint8_t *decoded_buffer_33bps;
>>
>> Why do you need a new buffer here instead of just reusing decoded_buffer?
>>
>>
> I don't follow, do you mean I could have decoded_33bps use decoded_buffer
> too? Wouldn't that kind of aliasing lead to problems?

What kind of aliasing?

>
>
>> > +        av_fast_malloc(&s->decoded_buffer_33bps,
>> &s->decoded_buffer_size_33bps, buf_size);
>> > +        if (!s->decoded_buffer)
>>
>> You are checking the wrong variable.
>>
>>
> I'll fix that, thanks!
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Andreas Rheinhardt Sept. 14, 2022, 5:10 p.m. UTC | #4
Martijn van Beurden:
> Op wo 14 sep. 2022 om 15:03 schreef Andreas Rheinhardt <
> andreas.rheinhardt@outlook.com>:
> 
>>> --- a/libavcodec/flacdec.c
>>> +++ b/libavcodec/flacdec.c
>>> @@ -64,6 +64,9 @@ typedef struct FLACContext {
>>>      int32_t *decoded[FLAC_MAX_CHANNELS];    ///< decoded samples
>>>      uint8_t *decoded_buffer;
>>>      unsigned int decoded_buffer_size;
>>> +    int64_t *decoded_33bps;                  ///< decoded samples for a
>> 33 bps subframe
>>> +    uint8_t *decoded_buffer_33bps;
>>
>> Why do you need a new buffer here instead of just reusing decoded_buffer?
>>
>>
> I don't follow, do you mean I could have decoded_33bps use decoded_buffer
> too? Wouldn't that kind of aliasing lead to problems?
> 

I was actually asking you whether it could lead to any problems. Or in
other words, whether decoded_buffer and decoded_buffer_33bps are used at
the same time, because it appeared to me that they are not. Now that I
have taken a second look I know the answer to be "no" because of
decorrelate_33bps().

- Andreas
diff mbox series

Patch

diff --git a/libavcodec/flac.c b/libavcodec/flac.c
index 352d663c67..174b4801be 100644
--- a/libavcodec/flac.c
+++ b/libavcodec/flac.c
@@ -28,7 +28,7 @@ 
 #include "flacdata.h"
 #include "flac_parse.h"
 
-static const int8_t sample_size_table[] = { 0, 8, 12, 0, 16, 20, 24, 0 };
+static const int8_t sample_size_table[] = { 0, 8, 12, 0, 16, 20, 24, 32 };
 
 static const AVChannelLayout flac_channel_layouts[8] = {
     AV_CHANNEL_LAYOUT_MONO,
@@ -82,7 +82,7 @@  int ff_flac_decode_frame_header(AVCodecContext *avctx, GetBitContext *gb,
 
     /* bits per sample */
     bps_code = get_bits(gb, 3);
-    if (bps_code == 3 || bps_code == 7) {
+    if (bps_code == 3) {
         av_log(avctx, AV_LOG_ERROR + log_level_offset,
                "invalid sample size code (%d)\n",
                bps_code);
diff --git a/libavcodec/flacdec.c b/libavcodec/flacdec.c
index c5d9e95168..14a9202569 100644
--- a/libavcodec/flacdec.c
+++ b/libavcodec/flacdec.c
@@ -64,6 +64,9 @@  typedef struct FLACContext {
     int32_t *decoded[FLAC_MAX_CHANNELS];    ///< decoded samples
     uint8_t *decoded_buffer;
     unsigned int decoded_buffer_size;
+    int64_t *decoded_33bps;                  ///< decoded samples for a 33 bps subframe
+    uint8_t *decoded_buffer_33bps;
+    unsigned int decoded_buffer_size_33bps;
     int buggy_lpc;                          ///< use workaround for old lavc encoded files
 
     FLACDSPContext dsp;
@@ -154,6 +157,24 @@  static int allocate_buffers(FLACContext *s)
                                  s->stream_info.channels,
                                  s->stream_info.max_blocksize,
                                  AV_SAMPLE_FMT_S32P, 0);
+    if (ret >= 0 && s->stream_info.bps == 32 && s->stream_info.channels == 2) {
+        buf_size = av_samples_get_buffer_size(NULL, 1,
+                                              s->stream_info.max_blocksize,
+                                              AV_SAMPLE_FMT_S64P, 0);
+        if (buf_size < 0)
+            return buf_size;
+
+        av_fast_malloc(&s->decoded_buffer_33bps, &s->decoded_buffer_size_33bps, buf_size);
+        if (!s->decoded_buffer)
+            return AVERROR(ENOMEM);
+
+        ret = av_samples_fill_arrays((uint8_t **)&s->decoded_33bps, NULL,
+                                     s->decoded_buffer_33bps,
+                                     1,
+                                     s->stream_info.max_blocksize,
+                                     AV_SAMPLE_FMT_S64P, 0);
+
+    }
     return ret < 0 ? ret : 0;
 }
 
@@ -331,6 +352,94 @@  static int decode_subframe_fixed(FLACContext *s, int32_t *decoded,
     return 0;
 }
 
+static int decode_subframe_fixed_wide(FLACContext *s, int32_t *decoded,
+                                      int pred_order, int bps)
+{
+    const int blocksize = s->blocksize;
+    int i;
+    int ret;
+
+    /* warm up samples */
+    for (i = 0; i < pred_order; i++) {
+        decoded[i] = get_sbits_long(&s->gb, bps);
+    }
+
+    if ((ret = decode_residuals(s, decoded, pred_order)) < 0)
+        return ret;
+
+    switch (pred_order) {
+    case 0:
+        break;
+    case 1:
+        for (i = pred_order; i < blocksize; i++)
+            decoded[i] += decoded[i-1];
+        break;
+    case 2:
+        for (i = pred_order; i < blocksize; i++)
+            decoded[i] = (int64_t)decoded[i] + 2*(int64_t)decoded[i-1] - (int64_t)decoded[i-2];
+        break;
+    case 3:
+        for (i = pred_order; i < blocksize; i++)
+            decoded[i] = (int64_t)decoded[i] + 3*(int64_t)decoded[i-1] - 3*(int64_t)decoded[i-2] + (int64_t)decoded[i-3];
+        break;
+    case 4:
+        for (i = pred_order; i < blocksize; i++)
+            decoded[i] = (int64_t)decoded[i] + 4*(int64_t)decoded[i-1] - 6*(int64_t)decoded[i-2] + 4*(int64_t)decoded[i-3] - (int64_t)decoded[i-4];
+        break;
+    default:
+        av_log(s->avctx, AV_LOG_ERROR, "illegal pred order %d\n", pred_order);
+        return AVERROR_INVALIDDATA;
+    }
+
+    return 0;
+}
+
+
+static int decode_subframe_fixed_33bps(FLACContext *s, int64_t *decoded,
+                                       int32_t *residual, int pred_order)
+{
+    const int blocksize = s->blocksize;
+    int i;
+    int ret;
+
+    /* warm up samples */
+    for (i = 0; i < pred_order; i++) {
+        decoded[i] = get_sbits64(&s->gb, 33);
+        av_log(s->avctx, AV_LOG_DEBUG, "warm-up %d = %" PRId64 "\n", i, decoded[i]);
+    }
+
+    if ((ret = decode_residuals(s, residual, pred_order)) < 0)
+        return ret;
+
+    switch (pred_order) {
+    case 0:
+        for (i = pred_order; i < blocksize; i++)
+            decoded[i] = residual[i];
+        break;
+    case 1:
+        for (i = pred_order; i < blocksize; i++)
+            decoded[i] = residual[i] + decoded[i-1];
+        break;
+    case 2:
+        for (i = pred_order; i < blocksize; i++)
+            decoded[i] = residual[i] + 2*decoded[i-1] - decoded[i-2];
+        break;
+    case 3:
+        for (i = pred_order; i < blocksize; i++)
+            decoded[i] = residual[i] + 3*decoded[i-1] - 3*decoded[i-2] + decoded[i-3];
+        break;
+    case 4:
+        for (i = pred_order; i < blocksize; i++)
+            decoded[i] = residual[i] + 4*decoded[i-1] - 6*decoded[i-2] + 4*decoded[i-3] - decoded[i-4];
+        break;
+    default:
+        av_log(s->avctx, AV_LOG_ERROR, "illegal pred order %d\n", pred_order);
+        return AVERROR_INVALIDDATA;
+    }
+
+    return 0;
+}
+
 static void lpc_analyze_remodulate(SUINT32 *decoded, const int coeffs[32],
                                    int order, int qlevel, int len, int bps)
 {
@@ -402,12 +511,53 @@  static int decode_subframe_lpc(FLACContext *s, int32_t *decoded, int pred_order,
     return 0;
 }
 
+static int decode_subframe_lpc_33bps(FLACContext *s, int64_t *decoded,
+                                     int32_t *residual, int pred_order)
+{
+    int i, j, ret;
+    int coeff_prec, qlevel;
+    int coeffs[32];
+
+    /* warm up samples */
+    for (i = 0; i < pred_order; i++) {
+        decoded[i] = get_sbits64(&s->gb, 33);
+    }
+
+    coeff_prec = get_bits(&s->gb, 4) + 1;
+    if (coeff_prec == 16) {
+        av_log(s->avctx, AV_LOG_ERROR, "invalid coeff precision\n");
+        return AVERROR_INVALIDDATA;
+    }
+    qlevel = get_sbits(&s->gb, 5);
+    if (qlevel < 0) {
+        av_log(s->avctx, AV_LOG_ERROR, "qlevel %d not supported, maybe buggy stream\n",
+               qlevel);
+        return AVERROR_INVALIDDATA;
+    }
+
+    for (i = 0; i < pred_order; i++) {
+        coeffs[pred_order - i - 1] = get_sbits(&s->gb, coeff_prec);
+    }
+
+    if ((ret = decode_residuals(s, residual, pred_order)) < 0)
+        return ret;
+
+    for (i = pred_order; i < s->blocksize; i++, decoded++) {
+        int64_t sum = 0;
+        for (j = 0; j < pred_order; j++)
+            sum += (int64_t)coeffs[j] * decoded[j];
+        decoded[j] = residual[i] + (sum >> qlevel);
+    }
+
+    return 0;
+}
+
 static inline int decode_subframe(FLACContext *s, int channel)
 {
     int32_t *decoded = s->decoded[channel];
     int type, wasted = 0;
     int bps = s->stream_info.bps;
-    int i, tmp, ret;
+    int i, ret;
 
     if (channel == 0) {
         if (s->ch_mode == FLAC_CHMODE_RIGHT_SIDE)
@@ -436,34 +586,63 @@  static inline int decode_subframe(FLACContext *s, int channel)
         wasted = 1 + get_unary(&s->gb, 1, get_bits_left(&s->gb));
         bps -= wasted;
     }
-    if (bps > 32) {
-        avpriv_report_missing_feature(s->avctx, "Decorrelated bit depth > 32");
-        return AVERROR_PATCHWELCOME;
-    }
 
 //FIXME use av_log2 for types
     if (type == 0) {
-        tmp = get_sbits_long(&s->gb, bps);
-        for (i = 0; i < s->blocksize; i++)
-            decoded[i] = tmp;
+        if (bps < 33) {
+            int32_t tmp = get_sbits_long(&s->gb, bps);
+            for (i = 0; i < s->blocksize; i++)
+                decoded[i] = tmp;
+        } else {
+            int64_t tmp = get_sbits64(&s->gb, 33);
+            for (i = 0; i < s->blocksize; i++)
+                s->decoded_33bps[i] = tmp;
+        }
     } else if (type == 1) {
-        for (i = 0; i < s->blocksize; i++)
-            decoded[i] = get_sbits_long(&s->gb, bps);
+        if (bps < 33) {
+            for (i = 0; i < s->blocksize; i++)
+                decoded[i] = get_sbits_long(&s->gb, bps);
+        } else {
+            for (i = 0; i < s->blocksize; i++)
+                s->decoded_33bps[i] = get_sbits64(&s->gb, 33);
+        }
     } else if ((type >= 8) && (type <= 12)) {
-        if ((ret = decode_subframe_fixed(s, decoded, type & ~0x8, bps)) < 0)
-            return ret;
+        int order = type & ~0x8;
+        if (bps < 33) {
+            if (bps + order <= 32) {
+                if ((ret = decode_subframe_fixed(s, decoded, order, bps)) < 0)
+                    return ret;
+            } else {
+                if ((ret = decode_subframe_fixed_wide(s, decoded, order, bps)) < 0)
+                    return ret;
+            }
+        } else {
+            if ((ret = decode_subframe_fixed_33bps(s, s->decoded_33bps, decoded, order)) < 0)
+                return ret;
+        }
     } else if (type >= 32) {
-        if ((ret = decode_subframe_lpc(s, decoded, (type & ~0x20)+1, bps)) < 0)
-            return ret;
+        if (bps < 33) {
+            if ((ret = decode_subframe_lpc(s, decoded, (type & ~0x20)+1, bps)) < 0)
+                return ret;
+        } else {
+            if ((ret = decode_subframe_lpc_33bps(s, s->decoded_33bps, decoded, (type & ~0x20)+1)) < 0)
+                return ret;
+        }
     } else {
         av_log(s->avctx, AV_LOG_ERROR, "invalid coding type\n");
         return AVERROR_INVALIDDATA;
     }
 
-    if (wasted && wasted < 32) {
-        int i;
-        for (i = 0; i < s->blocksize; i++)
-            decoded[i] = (unsigned)decoded[i] << wasted;
+    if (wasted) {
+        if (wasted+bps == 33) {
+            int i;
+            for (i = 0; i < s->blocksize; i++)
+                s->decoded_33bps[i] = (uint64_t)decoded[i] << wasted;
+        } else if (wasted < 32) {
+            int i;
+            for (i = 0; i < s->blocksize; i++)
+                decoded[i] = (unsigned)decoded[i] << wasted;
+        }
     }
 
     return 0;
@@ -554,6 +733,26 @@  static int decode_frame(FLACContext *s)
     return 0;
 }
 
+static void decorrelate_33bps(int ch_mode, int32_t **decoded, int64_t *decoded_33bps, int len)
+{
+    int i;
+    if (ch_mode == FLAC_CHMODE_LEFT_SIDE ) {
+        for (i = 0; i < len; i++)
+           decoded[1][i] = decoded[0][i] - decoded_33bps[i];
+    } else if (ch_mode == FLAC_CHMODE_RIGHT_SIDE ) {
+        for (i = 0; i < len; i++)
+           decoded[0][i] = decoded[1][i] + decoded_33bps[i];
+    } else if (ch_mode == FLAC_CHMODE_MID_SIDE ) {
+        for (i = 0; i < len; i++) {
+            uint64_t a = decoded[0][i];
+            int64_t b = decoded_33bps[i];
+            a -= b >> 1;
+            decoded[0][i] = (a + b);
+            decoded[1][i] = a;
+        }
+    }
+}
+
 static int flac_decode_frame(AVCodecContext *avctx, AVFrame *frame,
                              int *got_frame_ptr, AVPacket *avpkt)
 {
@@ -612,9 +811,15 @@  static int flac_decode_frame(AVCodecContext *avctx, AVFrame *frame,
     if ((ret = ff_thread_get_buffer(avctx, frame, 0)) < 0)
         return ret;
 
-    s->dsp.decorrelate[s->ch_mode](frame->data, s->decoded,
-                                   s->stream_info.channels,
-                                   s->blocksize, s->sample_shift);
+    if (s->stream_info.bps == 32 && s->ch_mode > 0) {
+        decorrelate_33bps(s->ch_mode, s->decoded, s->decoded_33bps, s->blocksize);
+        s->dsp.decorrelate[0](frame->data, s->decoded, s->stream_info.channels,
+                              s->blocksize, s->sample_shift);
+    } else {
+        s->dsp.decorrelate[s->ch_mode](frame->data, s->decoded,
+                                       s->stream_info.channels,
+                                       s->blocksize, s->sample_shift);
+    }
 
     if (bytes_read > buf_size) {
         av_log(s->avctx, AV_LOG_ERROR, "overread: %d\n", bytes_read - buf_size);
@@ -635,6 +840,7 @@  static av_cold int flac_decode_close(AVCodecContext *avctx)
     FLACContext *s = avctx->priv_data;
 
     av_freep(&s->decoded_buffer);
+    av_freep(&s->decoded_buffer_33bps);
 
     return 0;
 }
diff --git a/libavcodec/get_bits.h b/libavcodec/get_bits.h
index 992765dc92..52d13b8242 100644
--- a/libavcodec/get_bits.h
+++ b/libavcodec/get_bits.h
@@ -596,6 +596,18 @@  static inline int get_sbits_long(GetBitContext *s, int n)
     return sign_extend(get_bits_long(s, n), n);
 }
 
+/**
+ * Read 0-64 bits as a signed integer.
+ */
+static inline int64_t get_sbits64(GetBitContext *s, int n)
+{
+    // sign_extend(x, 0) is undefined
+    if (!n)
+        return 0;
+
+    return sign_extend64(get_bits64(s, n), n);
+}
+
 /**
  * Show 0-32 bits.
  */
diff --git a/libavcodec/mathops.h b/libavcodec/mathops.h
index f81d21f9c4..8a82d9d086 100644
--- a/libavcodec/mathops.h
+++ b/libavcodec/mathops.h
@@ -137,6 +137,15 @@  static inline av_const int sign_extend(int val, unsigned bits)
 }
 #endif
 
+#ifndef sign_extend64
+static inline av_const int64_t sign_extend64(int64_t val, unsigned bits)
+{
+    unsigned shift = 8 * sizeof(int64_t) - bits;
+    union { uint64_t u; int64_t s; } v = { (uint64_t) val << shift };
+    return v.s >> shift;
+}
+#endif
+
 #ifndef zero_extend
 static inline av_const unsigned zero_extend(unsigned val, unsigned bits)
 {