diff mbox series

[FFmpeg-devel,5/5] avcodec/cfhddata: Reduce stack usage

Message ID AS8P250MB0744D466DE062DB6D899FB478F7D9@AS8P250MB0744.EURP250.PROD.OUTLOOK.COM
State Accepted
Commit e6d89d0efd9194198d9964e47bf428be222594ea
Headers show
Series [FFmpeg-devel,1/5] avcodec/cfhd, cfhddata: Simplify check for escape | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Andreas Rheinhardt Sept. 3, 2022, 8:35 p.m. UTC
Creating CFHD RL VLC tables works by first extending
the codes by the sign, followed by creating a VLC,
followed by deriving the RL VLC from this VLC (which
is then discarded). Extending the codes uses stack arrays.

The tables used to initialize the VLC are already sorted
from left-to-right in the tree. This means that the
corresponding VLC entries are generally also ascending,
but not always: Entries from subtables always follow
the corresponding main table although it is possible
for the right-most node to fit into the main table.

This suggests that one can try to use the final destination
buffer as scratch buffer for the tables with sign included.
Unfortunately it works for neither of the tables if one
uses the right-most part of the RL VLC buffer as scratch buffer;
using the left-most part of the RL VLC buffer as scratch buffer
might work if one traverses the VLC entries from end to start.
But it works only for the little RL VLC (table 9), not for table 18.

Therefore this patch uses the RL VLC buffer for table 9
as scratch buffer for creating the bigger table 18.
Afterwards the left part of the buffer for table 9 is
used as scratch buffer to create table 9.

This fixes the cfhd part of ticket #9399 (if it is not already fixed).
Notice that I do not consider the previous stack usage excessive.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
I actually regard #9399 as a toolchain issue and not as a reason
to pessimize the code for all the other arches/toolchains
where it works.

 libavcodec/cfhddata.c | 47 +++++++++++++++++++++----------------------
 1 file changed, 23 insertions(+), 24 deletions(-)

Comments

Paul B Mahol Sept. 3, 2022, 9:49 p.m. UTC | #1
The FATE does not cover 9 (old) codebook, so make sure it is still working.
Andreas Rheinhardt Sept. 3, 2022, 9:56 p.m. UTC | #2
Paul B Mahol:
> The FATE does not cover 9 (old) codebook, so make sure it is still working.
> 

It's CRC checksum didn't change in patches 2-5.

- Andreas
Paul B Mahol Sept. 5, 2022, 10:25 a.m. UTC | #3
On Sat, Sep 3, 2022 at 11:56 PM Andreas Rheinhardt <
andreas.rheinhardt@outlook.com> wrote:

> Paul B Mahol:
> > The FATE does not cover 9 (old) codebook, so make sure it is still
> working.
> >
>
> It's CRC checksum didn't change in patches 2-5.
>

Set LGTM.


>
> - Andreas
>
diff mbox series

Patch

diff --git a/libavcodec/cfhddata.c b/libavcodec/cfhddata.c
index efe932dc3b..fd5cc8174e 100644
--- a/libavcodec/cfhddata.c
+++ b/libavcodec/cfhddata.c
@@ -127,11 +127,8 @@  static const CFHD_RL_ELEM table_18_vlc[NB_VLC_TABLE_18] = {
 
 static av_cold int cfhd_init_vlc(CFHD_RL_VLC_ELEM out[], unsigned out_size,
                                  const CFHD_RL_ELEM table_vlc[], unsigned table_size,
-                                 void *logctx)
+                                 CFHD_RL_VLC_ELEM tmp[], void *logctx)
 {
-    uint8_t  new_cfhd_vlc_len[NB_VLC_TABLE_18 * 2];
-    uint16_t new_cfhd_vlc_run[NB_VLC_TABLE_18 * 2];
-    int16_t  new_cfhd_vlc_level[NB_VLC_TABLE_18 * 2];
     VLC vlc;
     unsigned j;
     int ret;
@@ -139,27 +136,28 @@  static av_cold int cfhd_init_vlc(CFHD_RL_VLC_ELEM out[], unsigned out_size,
     /** Similar to dv.c, generate signed VLC tables **/
 
     for (unsigned i = j = 0; i < table_size; i++, j++) {
-        new_cfhd_vlc_len[j]   = table_vlc[i].len;
-        new_cfhd_vlc_run[j]   = table_vlc[i].run;
-        new_cfhd_vlc_level[j] = table_vlc[i].level;
+        tmp[j].len   = table_vlc[i].len;
+        tmp[j].run   = table_vlc[i].run;
+        tmp[j].level = table_vlc[i].level;
 
         /* Don't include the zero level nor escape bits */
         if (table_vlc[i].level && table_vlc[i].run) {
-            new_cfhd_vlc_len[j]++;
+            tmp[j].len++;
             j++;
-            new_cfhd_vlc_len[j]   =  table_vlc[i].len + 1;
-            new_cfhd_vlc_run[j]   =  table_vlc[i].run;
-            new_cfhd_vlc_level[j] = -table_vlc[i].level;
+            tmp[j].len   =  table_vlc[i].len + 1;
+            tmp[j].run   =  table_vlc[i].run;
+            tmp[j].level = -table_vlc[i].level;
         }
     }
 
-    ret = ff_init_vlc_from_lengths(&vlc, VLC_BITS, j, new_cfhd_vlc_len,
-                                   1, NULL, 0, 0, 0, 0, logctx);
+    ret = ff_init_vlc_from_lengths(&vlc, VLC_BITS, j,
+                                   &tmp[0].len, sizeof(tmp[0]),
+                                   NULL, 0, 0, 0, 0, logctx);
     if (ret < 0)
         return ret;
     av_assert0(vlc.table_size == out_size);
 
-    for (unsigned i = 0; i < out_size; i++) {
+    for (unsigned i = out_size; i-- > 0;) {
         int code = vlc.table[i].sym;
         int len  = vlc.table[i].len;
         int level, run;
@@ -168,8 +166,8 @@  static av_cold int cfhd_init_vlc(CFHD_RL_VLC_ELEM out[], unsigned out_size,
             run   = 0;
             level = code;
         } else {
-            run   = new_cfhd_vlc_run[code];
-            level = new_cfhd_vlc_level[code];
+            run   = tmp[code].run;
+            level = tmp[code].level;
         }
         out[i].len   = len;
         out[i].level = level;
@@ -184,16 +182,17 @@  av_cold int ff_cfhd_init_vlcs(CFHDContext *s)
 {
     int ret;
 
-    /* Table 9 */
-    ret = cfhd_init_vlc(s->table_9_rl_vlc, FF_ARRAY_ELEMS(s->table_9_rl_vlc),
-                        table_9_vlc,       FF_ARRAY_ELEMS(table_9_vlc),
-                        s->avctx);
-    if (ret < 0)
-        return ret;
-    /* Table 18 */
+    /* Table 18 - we reuse the unused table_9_rl_vlc as scratch buffer here */
     ret = cfhd_init_vlc(s->table_18_rl_vlc, FF_ARRAY_ELEMS(s->table_18_rl_vlc),
                         table_18_vlc,       FF_ARRAY_ELEMS(table_18_vlc),
-                        s->avctx);
+                        s->table_9_rl_vlc, s->avctx);
+    if (ret < 0)
+        return ret;
+    /* Table 9 - table_9_rl_vlc itself is used as scratch buffer; it works
+     * because we are counting down in the final loop */
+    ret = cfhd_init_vlc(s->table_9_rl_vlc, FF_ARRAY_ELEMS(s->table_9_rl_vlc),
+                        table_9_vlc,       FF_ARRAY_ELEMS(table_9_vlc),
+                        s->table_9_rl_vlc, s->avctx);
     if (ret < 0)
         return ret;
     return 0;