diff mbox series

[FFmpeg-devel,v3] libavcodec/flacenc: add backward-compatible 32 bit-per-sample capability

Message ID 20220103202608.56364-1-mvanb1@gmail.com
State Superseded
Headers show
Series [FFmpeg-devel,v3] libavcodec/flacenc: add backward-compatible 32 bit-per-sample capability | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished
andriy/make_ppc success Make finished
andriy/make_fate_ppc fail Make fate failed

Commit Message

Martijn van Beurden Jan. 3, 2022, 8:26 p.m. UTC
Enables creation of FLAC files with up to 32 bits-per-sample, up from the
previous limit of 24 bit. This is a feature requested for RAWcooked, the
archiving community has a need for storing files with 32-bit integer audio
samples. See https://github.com/MediaArea/RAWcooked/issues/356

Restrictions to the encoder are added so created files are compatible with
existing decoders. Stereo decorrelation is disabled on 32 bit-per-sample,
because a side channel would need 33 bit-per-sample, causing problems in
existing 32 bit datapaths. Also only LPC encoding is enabled, because
decoders capable of processing 24-bit files already use 64-bit processing
for LPC, but not for fixed subframes.

Furthermore, predictions and residuals are checked for causing integer
overflow, reverting to a verbatim (store) subframe in case no LPC coeffs
can be found that do not cause overflow.

ffmpeg's FLAC decoder has been forward-compatible with this change since
commit c720b9ce98 (May 2015). libFLAC is forward-compatible since release
1.2.1 (September 2007), the flac command line tool however blocks 32-bit
files out of caution, it having been untested until now.

To create 32 bit files, -bits_per_raw_sample 32 must be specified because
of trac ticket 9563
---
 libavcodec/flacdsp.c | 45 +++++++++++++++++++++
 libavcodec/flacdsp.h |  3 ++
 libavcodec/flacenc.c | 94 +++++++++++++++++++++++++++++++++++++-------
 3 files changed, 127 insertions(+), 15 deletions(-)

Comments

Lynne Jan. 5, 2022, 3:57 a.m. UTC | #1
3 Jan 2022, 21:26 by mvanb1@gmail.com:

> +        if (pmax > 0) {
> +            if (lpc_reduction_tries >= 2)
> +                return 0;
> +            lpc_reduction_tries++;
> +            for (int i = 0; i < order; i++)
> +                coefs[i] = ((int64_t)coefs[i] * INT32_MAX) / pmax;
> +        }
> +    } while (pmax > 0);
> +    return 1;
> +}
>

Could you invert the return value of the function?
Return 0 if there's no overflow and 1 if there is? That's
usually how we use return values throughout the code
too, and it saves you from having to invert it when you
use it.


>  static void flac_lpc_16_c(int32_t *decoded, const int coeffs[32],
>  int pred_order, int qlevel, int len)
>  {
> diff --git a/libavcodec/flacdsp.h b/libavcodec/flacdsp.h
> index 7bb0dd0e9a..5978a4722a 100644
> --- a/libavcodec/flacdsp.h
> +++ b/libavcodec/flacdsp.h
> @@ -40,4 +40,7 @@ void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt, int channels, i
>  void ff_flacdsp_init_arm(FLACDSPContext *c, enum AVSampleFormat fmt, int channels, int bps);
>  void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int channels, int bps);
>  
> +int ff_flacdsp_lpc_encode_c_32_overflow_detect(int32_t *res, const int32_t *smp, int len,
> +                                               int order, int32_t *coefs, int shift);
>

Could you also move the function to flacenc.c? The flacdsp.c
file gets compiled if either is enabled, but if flacenc.c isn't
enabled, then this code shouldn't be enabled, to save space.
The function isn't likely to get SIMD, and the compiler could
do more aggressive optimizations if it's in the same file.
 

>  put_sbits(&s->pb, sub->obits, res[0]);
>  } else if (sub->type == FLAC_SUBFRAME_VERBATIM) {
> -            while (res < frame_end)
> -                put_sbits(&s->pb, sub->obits, *res++);
> +            if (sub->obits < 32) {
> +                while (res < frame_end)
> +                    put_sbits(&s->pb, sub->obits, *res++);
> +            } else {
> +                while (res < frame_end)
> +                    put_bits32(&s->pb, *res++);
> +            }
>  } else {
>  /* warm-up samples */
> -            for (i = 0; i < sub->order; i++)
> -                put_sbits(&s->pb, sub->obits, *res++);
> +            if (sub->obits < 32) {
> +                for (i = 0; i < sub->order; i++)
> +                    put_sbits(&s->pb, sub->obits, *res++);
> +            }else{
> +                for (i = 0; i < sub->order; i++)
> +                    put_bits32(&s->pb, *res++);
> +            }
>

Small nit, but could you put spaces in the }else{

Apart from that, looks good.
diff mbox series

Patch

diff --git a/libavcodec/flacdsp.c b/libavcodec/flacdsp.c
index bc9a5dbed9..b6c163981e 100644
--- a/libavcodec/flacdsp.c
+++ b/libavcodec/flacdsp.c
@@ -43,6 +43,51 @@ 
 #define PLANAR 1
 #include "flacdsp_template.c"
 
+#define ZIGZAG_32BIT_MAX  0x3FFFFFFF
+#define ZIGZAG_32BIT_MIN -0x3FFFFFFF
+
+int ff_flacdsp_lpc_encode_c_32_overflow_detect(int32_t *res, const int32_t *smp, int len,
+                                               int order, int32_t *coefs, int shift)
+{
+    /* This function checks for every prediction and every residual
+     * whether they cause integer overflow in existing decoders. In
+     * case the prediction exceeds int32_t limits, prediction
+     * coefficients are lowered accordingly. If the residual exceeds
+     * ZIGZAG_32BIT_MAX and _MIN or coefficients have been lowered
+     * twice but the prediction still overflows, give up */
+    int lpc_reduction_tries = 0;
+    int64_t pmax;
+    for (int i = 0; i < order; i++)
+        res[i] = smp[i];
+    do {
+        pmax = 0;
+        for (int i = order; i < len; i++) {
+            int64_t p = 0, tmp;
+            for (int j = 0; j < order; j++)
+                p += (int64_t)coefs[j]*smp[(i-1)-j];
+            p >>= shift;
+            tmp = smp[i] - p;
+            if (p > INT32_MAX && p > pmax)
+                pmax = p;
+            else if (p < INT32_MIN && (p * -1) > pmax)
+                pmax = p * -1;
+            if (tmp > ZIGZAG_32BIT_MAX || tmp < ZIGZAG_32BIT_MIN)
+                return 0;
+            res[i] = tmp;
+        }
+
+        if (pmax > 0) {
+            if (lpc_reduction_tries >= 2)
+                return 0;
+            lpc_reduction_tries++;
+            for (int i = 0; i < order; i++)
+                coefs[i] = ((int64_t)coefs[i] * INT32_MAX) / pmax;
+        }
+    } while (pmax > 0);
+    return 1;
+}
+
+
 static void flac_lpc_16_c(int32_t *decoded, const int coeffs[32],
                           int pred_order, int qlevel, int len)
 {
diff --git a/libavcodec/flacdsp.h b/libavcodec/flacdsp.h
index 7bb0dd0e9a..5978a4722a 100644
--- a/libavcodec/flacdsp.h
+++ b/libavcodec/flacdsp.h
@@ -40,4 +40,7 @@  void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt, int channels, i
 void ff_flacdsp_init_arm(FLACDSPContext *c, enum AVSampleFormat fmt, int channels, int bps);
 void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int channels, int bps);
 
+int ff_flacdsp_lpc_encode_c_32_overflow_detect(int32_t *res, const int32_t *smp, int len,
+                                               int order, int32_t *coefs, int shift);
+
 #endif /* AVCODEC_FLACDSP_H */
diff --git a/libavcodec/flacenc.c b/libavcodec/flacenc.c
index 595928927d..bb7138acb3 100644
--- a/libavcodec/flacenc.c
+++ b/libavcodec/flacenc.c
@@ -254,10 +254,30 @@  static av_cold int flac_encode_init(AVCodecContext *avctx)
         s->bps_code                = 4;
         break;
     case AV_SAMPLE_FMT_S32:
-        if (avctx->bits_per_raw_sample != 24)
-            av_log(avctx, AV_LOG_WARNING, "encoding as 24 bits-per-sample\n");
-        avctx->bits_per_raw_sample = 24;
-        s->bps_code                = 6;
+        if (avctx->bits_per_raw_sample <= 24) {
+            if (avctx->bits_per_raw_sample < 24)
+                av_log(avctx, AV_LOG_WARNING, "encoding as 24 bits-per-sample\n");
+            avctx->bits_per_raw_sample = 24;
+            s->bps_code                = 6;
+        } else {
+            av_log(avctx, AV_LOG_WARNING, "non-streamable bits-per-sample\n");
+            s->bps_code = 0;
+            if (avctx->bits_per_raw_sample == 0)
+                avctx->bits_per_raw_sample = 32;
+            if (s->options.lpc_type != FF_LPC_TYPE_LEVINSON &&
+                s->options.lpc_type != FF_LPC_TYPE_CHOLESKY) {
+                av_log(avctx, AV_LOG_WARNING, "forcing lpc_type levinson, lpc_type fixed or none not supported with >24 bits-per-sample FLAC\n");
+                s->options.lpc_type = FF_LPC_TYPE_LEVINSON;
+            }
+            if (avctx->bits_per_raw_sample == 32) {
+                /* Because stereo decorrelation can raise the bitdepth of
+                 * a subframe to 33 bits, we disable it */
+                if (s->options.ch_mode != FLAC_CHMODE_INDEPENDENT) {
+                    av_log(avctx, AV_LOG_WARNING, "disabling stereo decorrelation, not supported with 32 bits-per-sample FLAC\n");
+                    s->options.ch_mode = FLAC_CHMODE_INDEPENDENT;
+                }
+            }
+        }
         break;
     }
 
@@ -686,7 +706,7 @@  static uint64_t calc_rice_params(RiceContext *rc,
 
     tmp_rc.coding_mode = rc->coding_mode;
 
-    for (i = 0; i < n; i++)
+    for (i = pred_order; i < n; i++)
         udata[i] = (2 * data[i]) ^ (data[i] >> 31);
 
     calc_sum_top(pmax, exact ? kmax : 0, udata, n, pred_order, sums);
@@ -868,7 +888,11 @@  static int encode_residual_ch(FlacEncodeContext *s, int ch)
             order = av_clip(order, min_order - 1, max_order - 1);
             if (order == last_order)
                 continue;
-            if (s->bps_code * 4 + s->options.lpc_coeff_precision + av_log2(order) <= 32) {
+            if (s->avctx->bits_per_raw_sample > 24) {
+                if (!ff_flacdsp_lpc_encode_c_32_overflow_detect(res, smp, n, order+1,
+                                                                coefs[order], shift[order]))
+                    continue;
+            } else if (s->bps_code * 4 + s->options.lpc_coeff_precision + av_log2(order) <= 32) {
                 s->flac_dsp.lpc16_encode(res, smp, n, order+1, coefs[order],
                                          shift[order]);
             } else {
@@ -888,7 +912,11 @@  static int encode_residual_ch(FlacEncodeContext *s, int ch)
         opt_order = 0;
         bits[0]   = UINT32_MAX;
         for (i = min_order-1; i < max_order; i++) {
-            if (s->bps_code * 4 + s->options.lpc_coeff_precision + av_log2(i) <= 32) {
+            if (s->avctx->bits_per_raw_sample > 24) {
+                if (!ff_flacdsp_lpc_encode_c_32_overflow_detect(res, smp, n, i+1,
+                                                                coefs[i], shift[i]))
+                    continue;
+            } else if (s->bps_code * 4 + s->options.lpc_coeff_precision + av_log2(i) <= 32) {
                 s->flac_dsp.lpc16_encode(res, smp, n, i+1, coefs[i], shift[i]);
             } else {
                 s->flac_dsp.lpc32_encode(res, smp, n, i+1, coefs[i], shift[i]);
@@ -910,7 +938,11 @@  static int encode_residual_ch(FlacEncodeContext *s, int ch)
             for (i = last-step; i <= last+step; i += step) {
                 if (i < min_order-1 || i >= max_order || bits[i] < UINT32_MAX)
                     continue;
-                if (s->bps_code * 4 + s->options.lpc_coeff_precision + av_log2(i) <= 32) {
+                if (s->avctx->bits_per_raw_sample > 24) {
+                    if (!ff_flacdsp_lpc_encode_c_32_overflow_detect(res, smp, n, i+1,
+                                                                    coefs[i], shift[i]))
+                        continue;
+                } else if (s->bps_code * 4 + s->options.lpc_coeff_precision + av_log2(i) <= 32) {
                     s->flac_dsp.lpc32_encode(res, smp, n, i+1, coefs[i], shift[i]);
                 } else {
                     s->flac_dsp.lpc16_encode(res, smp, n, i+1, coefs[i], shift[i]);
@@ -951,7 +983,11 @@  static int encode_residual_ch(FlacEncodeContext *s, int ch)
                 if (diffsum >8)
                     continue;
 
-                if (s->bps_code * 4 + s->options.lpc_coeff_precision + av_log2(opt_order - 1) <= 32) {
+                if (s->avctx->bits_per_raw_sample > 24) {
+                    if (!ff_flacdsp_lpc_encode_c_32_overflow_detect(res, smp, n, opt_order,
+                                                                    lpc_try, shift[opt_order-1]))
+                        continue;
+                } else if (s->bps_code * 4 + s->options.lpc_coeff_precision + av_log2(opt_order-1) <= 32) {
                     s->flac_dsp.lpc16_encode(res, smp, n, opt_order, lpc_try, shift[opt_order-1]);
                 } else {
                     s->flac_dsp.lpc32_encode(res, smp, n, opt_order, lpc_try, shift[opt_order-1]);
@@ -972,7 +1008,16 @@  static int encode_residual_ch(FlacEncodeContext *s, int ch)
     for (i = 0; i < sub->order; i++)
         sub->coefs[i] = coefs[sub->order-1][i];
 
-    if (s->bps_code * 4 + s->options.lpc_coeff_precision + av_log2(opt_order) <= 32) {
+    if (s->avctx->bits_per_raw_sample > 24) {
+        if (!ff_flacdsp_lpc_encode_c_32_overflow_detect(res, smp, n, sub->order,
+                                                        sub->coefs, sub->shift)) {
+            /* No coefs found that do not cause integer overflow,
+             * so return a verbatim subframe instead */
+            sub->type = sub->type_code = FLAC_SUBFRAME_VERBATIM;
+            memcpy(res, smp, n * sizeof(int32_t));
+            return subframe_count_exact(s, sub, 0);
+        }
+    } else if (s->bps_code * 4 + s->options.lpc_coeff_precision + av_log2(sub->order) <= 32) {
         s->flac_dsp.lpc16_encode(res, smp, n, sub->order, sub->coefs, sub->shift);
     } else {
         s->flac_dsp.lpc32_encode(res, smp, n, sub->order, sub->coefs, sub->shift);
@@ -1227,12 +1272,22 @@  static void write_subframes(FlacEncodeContext *s)
         if (sub->type == FLAC_SUBFRAME_CONSTANT) {
             put_sbits(&s->pb, sub->obits, res[0]);
         } else if (sub->type == FLAC_SUBFRAME_VERBATIM) {
-            while (res < frame_end)
-                put_sbits(&s->pb, sub->obits, *res++);
+            if (sub->obits < 32) {
+                while (res < frame_end)
+                    put_sbits(&s->pb, sub->obits, *res++);
+            } else {
+                while (res < frame_end)
+                    put_bits32(&s->pb, *res++);
+            }
         } else {
             /* warm-up samples */
-            for (i = 0; i < sub->order; i++)
-                put_sbits(&s->pb, sub->obits, *res++);
+            if (sub->obits < 32) {
+                for (i = 0; i < sub->order; i++)
+                    put_sbits(&s->pb, sub->obits, *res++);
+            }else{
+                for (i = 0; i < sub->order; i++)
+                    put_bits32(&s->pb, *res++);
+            }
 
             /* LPC coefficients */
             if (sub->type == FLAC_SUBFRAME_LPC) {
@@ -1305,7 +1360,7 @@  static int update_md5_sum(FlacEncodeContext *s, const void *samples)
                             (const uint16_t *) samples, buf_size / 2);
         buf = s->md5_buffer;
 #endif
-    } else {
+    } else if (s->avctx->bits_per_raw_sample <= 24) {
         int i;
         const int32_t *samples0 = samples;
         uint8_t *tmp            = s->md5_buffer;
@@ -1315,6 +1370,15 @@  static int update_md5_sum(FlacEncodeContext *s, const void *samples)
             AV_WL24(tmp + 3*i, v);
         }
         buf = s->md5_buffer;
+    } else {
+        /* s->avctx->bits_per_raw_sample <= 32 */
+        int i;
+        const int32_t *samples0 = samples;
+        uint8_t *tmp            = s->md5_buffer;
+
+        for (i = 0; i < s->frame.blocksize * s->channels; i++)
+            AV_WL32(tmp + 4*i, samples0[i]);
+        buf = s->md5_buffer;
     }
     av_md5_update(s->md5ctx, buf, buf_size);