diff mbox series

[FFmpeg-devel,2/2] avcodec/smacker: Optimize constant 16bit audio output

Message ID 20220503163059.26728-2-michael@niedermayer.cc
State Accepted
Commit 08e82e5b572b440f4faf160d2eac923ca47a59f8
Headers show
Series [FFmpeg-devel,1/2] tools/target_dec_fuzzer: Adjust threshold for smacker audio | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Michael Niedermayer May 3, 2022, 4:30 p.m. UTC
Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
---
 libavcodec/smacker.c | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

Comments

Tomas Härdin May 4, 2022, 9:39 a.m. UTC | #1
tis 2022-05-03 klockan 18:30 +0200 skrev Michael Niedermayer:
> 
> +        } else if (stereo) {
> +            val  = 256*values[1] + values[0];
> +            val2 = 256*values[3] + values[2];
> +            for(; i < unp_size; i+=2) {
> +                pred[0] += val;
> +                pred[1] += val2;
> +                *samples++ = pred[0];
> +                *samples++ = pred[1];
> +            }
> +        } else {
> +            val = 256*values[1] + values[0];
> +            for(; i < unp_size; i++) {
> +                pred[0] += val;
> +                *samples++ = pred[0];
> +            }
> +        }

Got any numbers on how much faster this is? Just out of curiosity

Probably want to follow this up with a reindent patch

/Tomas
Michael Niedermayer July 6, 2022, 5:11 p.m. UTC | #2
On Wed, May 04, 2022 at 11:39:54AM +0200, Tomas Härdin wrote:
> tis 2022-05-03 klockan 18:30 +0200 skrev Michael Niedermayer:
> > 
> > +        } else if (stereo) {
> > +            val  = 256*values[1] + values[0];
> > +            val2 = 256*values[3] + values[2];
> > +            for(; i < unp_size; i+=2) {
> > +                pred[0] += val;
> > +                pred[1] += val2;
> > +                *samples++ = pred[0];
> > +                *samples++ = pred[1];
> > +            }
> > +        } else {
> > +            val = 256*values[1] + values[0];
> > +            for(; i < unp_size; i++) {
> > +                pred[0] += val;
> > +                *samples++ = pred[0];
> > +            }
> > +        }
> 
> Got any numbers on how much faster this is? Just out of curiosity

With the fuzzed sample:
before:
3263902379 decicycles in ABBB,     128 runs,      0 skips

after:
398977744 decicycles in ABBB,    1024 runs,      0 skips

the first times out after 128 runs which is why the runs differ

thx

[...]
Paul B Mahol July 6, 2022, 5:18 p.m. UTC | #3
lgtm
Tomas Härdin July 6, 2022, 6:26 p.m. UTC | #4
ons 2022-07-06 klockan 19:11 +0200 skrev Michael Niedermayer:
> On Wed, May 04, 2022 at 11:39:54AM +0200, Tomas Härdin wrote:
> > tis 2022-05-03 klockan 18:30 +0200 skrev Michael Niedermayer:
> > > 
> > > +        } else if (stereo) {
> > > +            val  = 256*values[1] + values[0];
> > > +            val2 = 256*values[3] + values[2];
> > > +            for(; i < unp_size; i+=2) {
> > > +                pred[0] += val;
> > > +                pred[1] += val2;
> > > +                *samples++ = pred[0];
> > > +                *samples++ = pred[1];
> > > +            }
> > > +        } else {
> > > +            val = 256*values[1] + values[0];
> > > +            for(; i < unp_size; i++) {
> > > +                pred[0] += val;
> > > +                *samples++ = pred[0];
> > > +            }
> > > +        }
> > 
> > Got any numbers on how much faster this is? Just out of curiosity
> 
> With the fuzzed sample:
> before:
> 3263902379 decicycles in ABBB,     128 runs,      0 skips
> 
> after:
> 398977744 decicycles in ABBB,    1024 runs,      0 skips
> 
> the first times out after 128 runs which is why the runs differ

Cool. Well, looks good

/Tomas
Michael Niedermayer July 7, 2022, 6:24 p.m. UTC | #5
On Wed, Jul 06, 2022 at 07:18:45PM +0200, Paul B Mahol wrote:
> lgtm

will apply

thx

[...]
diff mbox series

Patch

diff --git a/libavcodec/smacker.c b/libavcodec/smacker.c
index 179c70f1ee..5d94a54179 100644
--- a/libavcodec/smacker.c
+++ b/libavcodec/smacker.c
@@ -601,7 +601,7 @@  static int smka_decode_frame(AVCodecContext *avctx, AVFrame *frame,
     int i, res, ret;
     int unp_size;
     int bits, stereo;
-    unsigned pred[2], val;
+    unsigned pred[2], val, val2;
 
     if (buf_size <= 4) {
         av_log(avctx, AV_LOG_ERROR, "packet is too small\n");
@@ -672,7 +672,11 @@  static int smka_decode_frame(AVCodecContext *avctx, AVFrame *frame,
             pred[i] = av_bswap16(get_bits(&gb, 16));
         for(i = 0; i <= stereo; i++)
             *samples++ = pred[i];
-        for(; i < unp_size / 2; i++) {
+        unp_size /= 2;
+
+        if (vlc[0       ].table || vlc[         1].table ||
+            vlc[2*stereo].table || vlc[2*stereo+1].table) {
+        for(; i < unp_size ; i++) {
             unsigned idx = 2 * (i & stereo);
             if (get_bits_left(&gb) < 0) {
                 ret = AVERROR_INVALIDDATA;
@@ -691,6 +695,22 @@  static int smka_decode_frame(AVCodecContext *avctx, AVFrame *frame,
             pred[idx / 2] += val;
             *samples++ = pred[idx / 2];
         }
+        } else if (stereo) {
+            val  = 256*values[1] + values[0];
+            val2 = 256*values[3] + values[2];
+            for(; i < unp_size; i+=2) {
+                pred[0] += val;
+                pred[1] += val2;
+                *samples++ = pred[0];
+                *samples++ = pred[1];
+            }
+        } else {
+            val = 256*values[1] + values[0];
+            for(; i < unp_size; i++) {
+                pred[0] += val;
+                *samples++ = pred[0];
+            }
+        }
     } else { //8-bit data
         for(i = stereo; i >= 0; i--)
             pred[i] = get_bits(&gb, 8);