diff mbox

[FFmpeg-devel,1/3] avutil: add staticpool

Message ID 20180120042915.9161-1-mfcc64@gmail.com
State New
Headers show

Commit Message

Muhammad Faiz Jan. 20, 2018, 4:29 a.m. UTC
Help avoiding malloc-free cycles when allocating-freeing common
structures.

Signed-off-by: Muhammad Faiz <mfcc64@gmail.com>
---
 libavutil/staticpool.h | 117 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 117 insertions(+)
 create mode 100644 libavutil/staticpool.h

Comments

James Almer Jan. 20, 2018, 4:49 a.m. UTC | #1
On 1/20/2018 1:29 AM, Muhammad Faiz wrote:
> Help avoiding malloc-free cycles when allocating-freeing common
> structures.
> 
> Signed-off-by: Muhammad Faiz <mfcc64@gmail.com>
> ---
>  libavutil/staticpool.h | 117 +++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 117 insertions(+)
>  create mode 100644 libavutil/staticpool.h
> 
> diff --git a/libavutil/staticpool.h b/libavutil/staticpool.h
> new file mode 100644
> index 0000000000..9c9b2784bc
> --- /dev/null
> +++ b/libavutil/staticpool.h
> @@ -0,0 +1,117 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVUTIL_STATICPOOL_H
> +#define AVUTIL_STATICPOOL_H
> +
> +#include <stdatomic.h>
> +#include "avassert.h"
> +#include "mem.h"
> +
> +/**
> + * FF_STATICPOOL allocate memory without av_malloc if possible
> + * @param size must be 2^n between 64 and 4096
> + */
> +#define FF_STATICPOOL_DECLARE(type, size)                                       \
> +typedef struct type##_StaticPoolWrapper {                                       \
> +    type        buf;                                                            \
> +    unsigned    index;                                                          \
> +    atomic_uint next;                                                           \
> +} type##_StaticPoolWrapper;                                                     \
> +                                                                                \
> +static atomic_uint              type##_staticpool_next;                         \
> +static atomic_uint              type##_staticpool_last;                         \
> +static type##_StaticPoolWrapper type##_staticpool_table[size];                  \
> +                                                                                \
> +static type *type##_staticpool_malloc(void)                                     \
> +{                                                                               \
> +    unsigned val, index, serial, new_val;                                       \
> +                                                                                \
> +    av_assert0((size) >= 64 && (size) <= 4096 && !((size) & ((size) - 1)));     \
> +                                                                                \
> +    /* use serial, avoid spinlock */                                            \
> +    /* acquire, so we don't get stalled table[index].next */                    \
> +    val = atomic_load_explicit(&type##_staticpool_next, memory_order_acquire);  \
> +    do {                                                                        \
> +        index  = val & ((size) - 1);                                            \
> +        serial = val & ~((size) - 1);                                           \
> +        new_val = atomic_load_explicit(&type##_staticpool_table[index].next, memory_order_relaxed) | (serial + (size)); \
> +    } while (!atomic_compare_exchange_strong_explicit(&type##_staticpool_next, &val, new_val, \

The wrappers for atomic_compare_exchange_* in the compat folder are not
really working and fixing them is supposedly not trivial, so this will
only work with GCC and Clang but not with for example MSVC or SunCC.

Can you implement this using mutexes instead, or otherwise avoid using
atomic_compare_exchange_*? You can even use static mutex initialization
now for all targets, and not just native pthreads.

> +                                                      memory_order_acquire, memory_order_acquire)); \
> +                                                                                \
> +    index = val & ((size) - 1);                                                 \
> +    if (index)                                                                  \
> +        return &type##_staticpool_table[index].buf;                             \
> +                                                                                \
> +    index = atomic_fetch_add_explicit(&type##_staticpool_last, 1, memory_order_relaxed) + 1; \
> +    if (index < (size)) {                                                       \
> +        type##_staticpool_table[index].index = index;                           \
> +        return &type##_staticpool_table[index].buf;                             \
> +    }                                                                           \
> +                                                                                \
> +    atomic_fetch_add_explicit(&type##_staticpool_last, -1, memory_order_relaxed); \
> +    return av_malloc(sizeof(type));                                             \
> +}                                                                               \
> +                                                                                \
> +static inline type *type##_staticpool_mallocz(void)                             \
> +{                                                                               \
> +    type *ptr = type##_staticpool_malloc();                                     \
> +    if (ptr)                                                                    \
> +        memset(ptr, 0, sizeof(*ptr));                                           \
> +    return ptr;                                                                 \
> +}                                                                               \
> +                                                                                \
> +static void type##_staticpool_free(type *ptr)                                   \
> +{                                                                               \
> +    type##_StaticPoolWrapper *entry = (type##_StaticPoolWrapper *) ptr;         \
> +    unsigned val, serial, index, new_val;                                       \
> +                                                                                \
> +    if ((uintptr_t)ptr <= (uintptr_t)(type##_staticpool_table) ||               \
> +        (uintptr_t)ptr >= (uintptr_t)(type##_staticpool_table + size)) {        \
> +        av_free(ptr);                                                           \
> +        return;                                                                 \
> +    }                                                                           \
> +                                                                                \
> +    if (CONFIG_MEMORY_POISONING)                                                \
> +        memset(&entry->buf, FF_MEMORY_POISON, sizeof(entry->buf));              \
> +                                                                                \
> +    val = atomic_load_explicit(&type##_staticpool_next, memory_order_relaxed);  \
> +    do {                                                                        \
> +        index  = val & ((size) - 1);                                            \
> +        serial = val & ~((size) - 1);                                           \
> +        atomic_store_explicit(&entry->next, index, memory_order_relaxed);       \
> +        new_val = entry->index | (serial + (size));                             \
> +    } while (!atomic_compare_exchange_strong_explicit(&type##_staticpool_next, &val, new_val, \
> +                                                      memory_order_release, memory_order_relaxed)); \
> +}                                                                               \
> +                                                                                \
> +static inline void type##_staticpool_freep(type **ptr)                          \
> +{                                                                               \
> +    if (ptr) {                                                                  \
> +        type##_staticpool_free(*ptr);                                           \
> +        *ptr = NULL;                                                            \
> +    }                                                                           \
> +}                                                                               \
> +/* FF_STATICPOOL_DECLARE */
> +
> +#define FF_STATICPOOL_MALLOC(type) type##_staticpool_malloc()
> +#define FF_STATICPOOL_MALLOCZ(type) type##_staticpool_mallocz()
> +#define FF_STATICPOOL_FREE(type, v) type##_staticpool_free(v)
> +#define FF_STATICPOOL_FREEP(type, v) type##_staticpool_freep(v)
> +
> +#endif
>
Muhammad Faiz Jan. 20, 2018, 5:52 a.m. UTC | #2
On Sat, Jan 20, 2018 at 11:49 AM, James Almer <jamrial@gmail.com> wrote:
> On 1/20/2018 1:29 AM, Muhammad Faiz wrote:
>> Help avoiding malloc-free cycles when allocating-freeing common
>> structures.
>>
>> Signed-off-by: Muhammad Faiz <mfcc64@gmail.com>
>> ---
>>  libavutil/staticpool.h | 117 +++++++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 117 insertions(+)
>>  create mode 100644 libavutil/staticpool.h
>>
>> diff --git a/libavutil/staticpool.h b/libavutil/staticpool.h
>> new file mode 100644
>> index 0000000000..9c9b2784bc
>> --- /dev/null
>> +++ b/libavutil/staticpool.h
>> @@ -0,0 +1,117 @@
>> +/*
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>> + */
>> +
>> +#ifndef AVUTIL_STATICPOOL_H
>> +#define AVUTIL_STATICPOOL_H
>> +
>> +#include <stdatomic.h>
>> +#include "avassert.h"
>> +#include "mem.h"
>> +
>> +/**
>> + * FF_STATICPOOL allocate memory without av_malloc if possible
>> + * @param size must be 2^n between 64 and 4096
>> + */
>> +#define FF_STATICPOOL_DECLARE(type, size)                                       \
>> +typedef struct type##_StaticPoolWrapper {                                       \
>> +    type        buf;                                                            \
>> +    unsigned    index;                                                          \
>> +    atomic_uint next;                                                           \
>> +} type##_StaticPoolWrapper;                                                     \
>> +                                                                                \
>> +static atomic_uint              type##_staticpool_next;                         \
>> +static atomic_uint              type##_staticpool_last;                         \
>> +static type##_StaticPoolWrapper type##_staticpool_table[size];                  \
>> +                                                                                \
>> +static type *type##_staticpool_malloc(void)                                     \
>> +{                                                                               \
>> +    unsigned val, index, serial, new_val;                                       \
>> +                                                                                \
>> +    av_assert0((size) >= 64 && (size) <= 4096 && !((size) & ((size) - 1)));     \
>> +                                                                                \
>> +    /* use serial, avoid spinlock */                                            \
>> +    /* acquire, so we don't get stalled table[index].next */                    \
>> +    val = atomic_load_explicit(&type##_staticpool_next, memory_order_acquire);  \
>> +    do {                                                                        \
>> +        index  = val & ((size) - 1);                                            \
>> +        serial = val & ~((size) - 1);                                           \
>> +        new_val = atomic_load_explicit(&type##_staticpool_table[index].next, memory_order_relaxed) | (serial + (size)); \
>> +    } while (!atomic_compare_exchange_strong_explicit(&type##_staticpool_next, &val, new_val, \
>
> The wrappers for atomic_compare_exchange_* in the compat folder are not
> really working and fixing them is supposedly not trivial, so this will
> only work with GCC and Clang but not with for example MSVC or SunCC.

What's the problem? I only see that stdatomic compat make typedef
every atomic type to intptr_t, so atomic_*64_t won't work if
sizeof(intptr_t) == 32.
Here I use atomic_uint, so I guess it will work.

Note that if atomic_compare_exchange_* is broken then atomic_fetch_*
will also be broken because atomic_fetch_* call
atomic_compare_exchange_* on suncc compat.

>
> Can you implement this using mutexes instead, or otherwise avoid using
> atomic_compare_exchange_*? You can even use static mutex initialization
> now for all targets, and not just native pthreads.

Using mutex makes implementation slower.
Using atomic_exchange requires spin lock.
wm4 Jan. 20, 2018, 10:18 a.m. UTC | #3
On Sat, 20 Jan 2018 12:52:46 +0700
Muhammad Faiz <mfcc64@gmail.com> wrote:

> On Sat, Jan 20, 2018 at 11:49 AM, James Almer <jamrial@gmail.com> wrote:
> > On 1/20/2018 1:29 AM, Muhammad Faiz wrote:  
> >> Help avoiding malloc-free cycles when allocating-freeing common
> >> structures.
> >>
> >> Signed-off-by: Muhammad Faiz <mfcc64@gmail.com>
> >> ---
> >>  libavutil/staticpool.h | 117 +++++++++++++++++++++++++++++++++++++++++++++++++
> >>  1 file changed, 117 insertions(+)
> >>  create mode 100644 libavutil/staticpool.h
> >>
> >> diff --git a/libavutil/staticpool.h b/libavutil/staticpool.h
> >> new file mode 100644
> >> index 0000000000..9c9b2784bc
> >> --- /dev/null
> >> +++ b/libavutil/staticpool.h
> >> @@ -0,0 +1,117 @@
> >> +/*
> >> + * This file is part of FFmpeg.
> >> + *
> >> + * FFmpeg is free software; you can redistribute it and/or
> >> + * modify it under the terms of the GNU Lesser General Public
> >> + * License as published by the Free Software Foundation; either
> >> + * version 2.1 of the License, or (at your option) any later version.
> >> + *
> >> + * FFmpeg is distributed in the hope that it will be useful,
> >> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> >> + * Lesser General Public License for more details.
> >> + *
> >> + * You should have received a copy of the GNU Lesser General Public
> >> + * License along with FFmpeg; if not, write to the Free Software
> >> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> >> + */
> >> +
> >> +#ifndef AVUTIL_STATICPOOL_H
> >> +#define AVUTIL_STATICPOOL_H
> >> +
> >> +#include <stdatomic.h>
> >> +#include "avassert.h"
> >> +#include "mem.h"
> >> +
> >> +/**
> >> + * FF_STATICPOOL allocate memory without av_malloc if possible
> >> + * @param size must be 2^n between 64 and 4096
> >> + */
> >> +#define FF_STATICPOOL_DECLARE(type, size)                                       \
> >> +typedef struct type##_StaticPoolWrapper {                                       \
> >> +    type        buf;                                                            \
> >> +    unsigned    index;                                                          \
> >> +    atomic_uint next;                                                           \
> >> +} type##_StaticPoolWrapper;                                                     \
> >> +                                                                                \
> >> +static atomic_uint              type##_staticpool_next;                         \
> >> +static atomic_uint              type##_staticpool_last;                         \
> >> +static type##_StaticPoolWrapper type##_staticpool_table[size];                  \
> >> +                                                                                \
> >> +static type *type##_staticpool_malloc(void)                                     \
> >> +{                                                                               \
> >> +    unsigned val, index, serial, new_val;                                       \
> >> +                                                                                \
> >> +    av_assert0((size) >= 64 && (size) <= 4096 && !((size) & ((size) - 1)));     \
> >> +                                                                                \
> >> +    /* use serial, avoid spinlock */                                            \
> >> +    /* acquire, so we don't get stalled table[index].next */                    \
> >> +    val = atomic_load_explicit(&type##_staticpool_next, memory_order_acquire);  \
> >> +    do {                                                                        \
> >> +        index  = val & ((size) - 1);                                            \
> >> +        serial = val & ~((size) - 1);                                           \
> >> +        new_val = atomic_load_explicit(&type##_staticpool_table[index].next, memory_order_relaxed) | (serial + (size)); \
> >> +    } while (!atomic_compare_exchange_strong_explicit(&type##_staticpool_next, &val, new_val, \  
> >
> > The wrappers for atomic_compare_exchange_* in the compat folder are not
> > really working and fixing them is supposedly not trivial, so this will
> > only work with GCC and Clang but not with for example MSVC or SunCC.  
> 
> What's the problem? I only see that stdatomic compat make typedef
> every atomic type to intptr_t, so atomic_*64_t won't work if
> sizeof(intptr_t) == 32.
> Here I use atomic_uint, so I guess it will work.
> 
> Note that if atomic_compare_exchange_* is broken then atomic_fetch_*
> will also be broken because atomic_fetch_* call
> atomic_compare_exchange_* on suncc compat.
> 
> >
> > Can you implement this using mutexes instead, or otherwise avoid using
> > atomic_compare_exchange_*? You can even use static mutex initialization
> > now for all targets, and not just native pthreads.  
> 
> Using mutex makes implementation slower.
> Using atomic_exchange requires spin lock.

Uncontended mutexes are spinlocks.
wm4 Jan. 20, 2018, 10:22 a.m. UTC | #4
On Sat, 20 Jan 2018 11:29:13 +0700
Muhammad Faiz <mfcc64@gmail.com> wrote:

> Help avoiding malloc-free cycles when allocating-freeing common
> structures.
> 
> Signed-off-by: Muhammad Faiz <mfcc64@gmail.com>
> ---
>  libavutil/staticpool.h | 117 +++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 117 insertions(+)
>  create mode 100644 libavutil/staticpool.h
> 
> diff --git a/libavutil/staticpool.h b/libavutil/staticpool.h
> new file mode 100644
> index 0000000000..9c9b2784bc
> --- /dev/null
> +++ b/libavutil/staticpool.h
> @@ -0,0 +1,117 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVUTIL_STATICPOOL_H
> +#define AVUTIL_STATICPOOL_H
> +
> +#include <stdatomic.h>
> +#include "avassert.h"
> +#include "mem.h"
> +
> +/**
> + * FF_STATICPOOL allocate memory without av_malloc if possible
> + * @param size must be 2^n between 64 and 4096
> + */
> +#define FF_STATICPOOL_DECLARE(type, size)                                       \
> +typedef struct type##_StaticPoolWrapper {                                       \
> +    type        buf;                                                            \
> +    unsigned    index;                                                          \
> +    atomic_uint next;                                                           \
> +} type##_StaticPoolWrapper;                                                     \
> +                                                                                \
> +static atomic_uint              type##_staticpool_next;                         \
> +static atomic_uint              type##_staticpool_last;                         \
> +static type##_StaticPoolWrapper type##_staticpool_table[size];                  \
> +                                                                                \
> +static type *type##_staticpool_malloc(void)                                     \
> +{                                                                               \
> +    unsigned val, index, serial, new_val;                                       \
> +                                                                                \
> +    av_assert0((size) >= 64 && (size) <= 4096 && !((size) & ((size) - 1)));     \
> +                                                                                \
> +    /* use serial, avoid spinlock */                                            \
> +    /* acquire, so we don't get stalled table[index].next */                    \
> +    val = atomic_load_explicit(&type##_staticpool_next, memory_order_acquire);  \
> +    do {                                                                        \
> +        index  = val & ((size) - 1);                                            \
> +        serial = val & ~((size) - 1);                                           \
> +        new_val = atomic_load_explicit(&type##_staticpool_table[index].next, memory_order_relaxed) | (serial + (size)); \
> +    } while (!atomic_compare_exchange_strong_explicit(&type##_staticpool_next, &val, new_val, \
> +                                                      memory_order_acquire, memory_order_acquire)); \
> +                                                                                \
> +    index = val & ((size) - 1);                                                 \
> +    if (index)                                                                  \
> +        return &type##_staticpool_table[index].buf;                             \
> +                                                                                \
> +    index = atomic_fetch_add_explicit(&type##_staticpool_last, 1, memory_order_relaxed) + 1; \
> +    if (index < (size)) {                                                       \
> +        type##_staticpool_table[index].index = index;                           \
> +        return &type##_staticpool_table[index].buf;                             \
> +    }                                                                           \
> +                                                                                \
> +    atomic_fetch_add_explicit(&type##_staticpool_last, -1, memory_order_relaxed); \
> +    return av_malloc(sizeof(type));                                             \
> +}                                                                               \
> +                                                                                \
> +static inline type *type##_staticpool_mallocz(void)                             \
> +{                                                                               \
> +    type *ptr = type##_staticpool_malloc();                                     \
> +    if (ptr)                                                                    \
> +        memset(ptr, 0, sizeof(*ptr));                                           \
> +    return ptr;                                                                 \
> +}                                                                               \
> +                                                                                \
> +static void type##_staticpool_free(type *ptr)                                   \
> +{                                                                               \
> +    type##_StaticPoolWrapper *entry = (type##_StaticPoolWrapper *) ptr;         \
> +    unsigned val, serial, index, new_val;                                       \
> +                                                                                \
> +    if ((uintptr_t)ptr <= (uintptr_t)(type##_staticpool_table) ||               \
> +        (uintptr_t)ptr >= (uintptr_t)(type##_staticpool_table + size)) {        \
> +        av_free(ptr);                                                           \
> +        return;                                                                 \
> +    }                                                                           \
> +                                                                                \
> +    if (CONFIG_MEMORY_POISONING)                                                \
> +        memset(&entry->buf, FF_MEMORY_POISON, sizeof(entry->buf));              \
> +                                                                                \
> +    val = atomic_load_explicit(&type##_staticpool_next, memory_order_relaxed);  \
> +    do {                                                                        \
> +        index  = val & ((size) - 1);                                            \
> +        serial = val & ~((size) - 1);                                           \
> +        atomic_store_explicit(&entry->next, index, memory_order_relaxed);       \
> +        new_val = entry->index | (serial + (size));                             \
> +    } while (!atomic_compare_exchange_strong_explicit(&type##_staticpool_next, &val, new_val, \
> +                                                      memory_order_release, memory_order_relaxed)); \
> +}                                                                               \
> +                                                                                \
> +static inline void type##_staticpool_freep(type **ptr)                          \
> +{                                                                               \
> +    if (ptr) {                                                                  \
> +        type##_staticpool_free(*ptr);                                           \
> +        *ptr = NULL;                                                            \
> +    }                                                                           \
> +}                                                                               \
> +/* FF_STATICPOOL_DECLARE */
> +
> +#define FF_STATICPOOL_MALLOC(type) type##_staticpool_malloc()
> +#define FF_STATICPOOL_MALLOCZ(type) type##_staticpool_mallocz()
> +#define FF_STATICPOOL_FREE(type, v) type##_staticpool_free(v)
> +#define FF_STATICPOOL_FREEP(type, v) type##_staticpool_freep(v)
> +
> +#endif

I'm against this. Global mutable data is always bad. If you have 2
uses ffmpeg in the same process, their efficiency might get halved
because the available pool size is shared by both. The macro mess is
very ugly. Can we please not define pages of code as macros. Last but
not least this is the job of the memory manager, which already does
fancy things like per thread pools. I don't trust the atomics use
either, I'm don't want to have to debug that ever.
Hendrik Leppkes Jan. 20, 2018, 10:26 a.m. UTC | #5
On Sat, Jan 20, 2018 at 11:22 AM, wm4 <nfxjfg@googlemail.com> wrote:
> On Sat, 20 Jan 2018 11:29:13 +0700
> Muhammad Faiz <mfcc64@gmail.com> wrote:
>
>> Help avoiding malloc-free cycles when allocating-freeing common
>> structures.
>>
>> Signed-off-by: Muhammad Faiz <mfcc64@gmail.com>
>> ---
>>  libavutil/staticpool.h | 117 +++++++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 117 insertions(+)
>>  create mode 100644 libavutil/staticpool.h
>>
>> diff --git a/libavutil/staticpool.h b/libavutil/staticpool.h
>> new file mode 100644
>> index 0000000000..9c9b2784bc
>> --- /dev/null
>> +++ b/libavutil/staticpool.h
>> @@ -0,0 +1,117 @@
>> +/*
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>> + */
>> +
>> +#ifndef AVUTIL_STATICPOOL_H
>> +#define AVUTIL_STATICPOOL_H
>> +
>> +#include <stdatomic.h>
>> +#include "avassert.h"
>> +#include "mem.h"
>> +
>> +/**
>> + * FF_STATICPOOL allocate memory without av_malloc if possible
>> + * @param size must be 2^n between 64 and 4096
>> + */
>> +#define FF_STATICPOOL_DECLARE(type, size)                                       \
>> +typedef struct type##_StaticPoolWrapper {                                       \
>> +    type        buf;                                                            \
>> +    unsigned    index;                                                          \
>> +    atomic_uint next;                                                           \
>> +} type##_StaticPoolWrapper;                                                     \
>> +                                                                                \
>> +static atomic_uint              type##_staticpool_next;                         \
>> +static atomic_uint              type##_staticpool_last;                         \
>> +static type##_StaticPoolWrapper type##_staticpool_table[size];                  \
>> +                                                                                \
>> +static type *type##_staticpool_malloc(void)                                     \
>> +{                                                                               \
>> +    unsigned val, index, serial, new_val;                                       \
>> +                                                                                \
>> +    av_assert0((size) >= 64 && (size) <= 4096 && !((size) & ((size) - 1)));     \
>> +                                                                                \
>> +    /* use serial, avoid spinlock */                                            \
>> +    /* acquire, so we don't get stalled table[index].next */                    \
>> +    val = atomic_load_explicit(&type##_staticpool_next, memory_order_acquire);  \
>> +    do {                                                                        \
>> +        index  = val & ((size) - 1);                                            \
>> +        serial = val & ~((size) - 1);                                           \
>> +        new_val = atomic_load_explicit(&type##_staticpool_table[index].next, memory_order_relaxed) | (serial + (size)); \
>> +    } while (!atomic_compare_exchange_strong_explicit(&type##_staticpool_next, &val, new_val, \
>> +                                                      memory_order_acquire, memory_order_acquire)); \
>> +                                                                                \
>> +    index = val & ((size) - 1);                                                 \
>> +    if (index)                                                                  \
>> +        return &type##_staticpool_table[index].buf;                             \
>> +                                                                                \
>> +    index = atomic_fetch_add_explicit(&type##_staticpool_last, 1, memory_order_relaxed) + 1; \
>> +    if (index < (size)) {                                                       \
>> +        type##_staticpool_table[index].index = index;                           \
>> +        return &type##_staticpool_table[index].buf;                             \
>> +    }                                                                           \
>> +                                                                                \
>> +    atomic_fetch_add_explicit(&type##_staticpool_last, -1, memory_order_relaxed); \
>> +    return av_malloc(sizeof(type));                                             \
>> +}                                                                               \
>> +                                                                                \
>> +static inline type *type##_staticpool_mallocz(void)                             \
>> +{                                                                               \
>> +    type *ptr = type##_staticpool_malloc();                                     \
>> +    if (ptr)                                                                    \
>> +        memset(ptr, 0, sizeof(*ptr));                                           \
>> +    return ptr;                                                                 \
>> +}                                                                               \
>> +                                                                                \
>> +static void type##_staticpool_free(type *ptr)                                   \
>> +{                                                                               \
>> +    type##_StaticPoolWrapper *entry = (type##_StaticPoolWrapper *) ptr;         \
>> +    unsigned val, serial, index, new_val;                                       \
>> +                                                                                \
>> +    if ((uintptr_t)ptr <= (uintptr_t)(type##_staticpool_table) ||               \
>> +        (uintptr_t)ptr >= (uintptr_t)(type##_staticpool_table + size)) {        \
>> +        av_free(ptr);                                                           \
>> +        return;                                                                 \
>> +    }                                                                           \
>> +                                                                                \
>> +    if (CONFIG_MEMORY_POISONING)                                                \
>> +        memset(&entry->buf, FF_MEMORY_POISON, sizeof(entry->buf));              \
>> +                                                                                \
>> +    val = atomic_load_explicit(&type##_staticpool_next, memory_order_relaxed);  \
>> +    do {                                                                        \
>> +        index  = val & ((size) - 1);                                            \
>> +        serial = val & ~((size) - 1);                                           \
>> +        atomic_store_explicit(&entry->next, index, memory_order_relaxed);       \
>> +        new_val = entry->index | (serial + (size));                             \
>> +    } while (!atomic_compare_exchange_strong_explicit(&type##_staticpool_next, &val, new_val, \
>> +                                                      memory_order_release, memory_order_relaxed)); \
>> +}                                                                               \
>> +                                                                                \
>> +static inline void type##_staticpool_freep(type **ptr)                          \
>> +{                                                                               \
>> +    if (ptr) {                                                                  \
>> +        type##_staticpool_free(*ptr);                                           \
>> +        *ptr = NULL;                                                            \
>> +    }                                                                           \
>> +}                                                                               \
>> +/* FF_STATICPOOL_DECLARE */
>> +
>> +#define FF_STATICPOOL_MALLOC(type) type##_staticpool_malloc()
>> +#define FF_STATICPOOL_MALLOCZ(type) type##_staticpool_mallocz()
>> +#define FF_STATICPOOL_FREE(type, v) type##_staticpool_free(v)
>> +#define FF_STATICPOOL_FREEP(type, v) type##_staticpool_freep(v)
>> +
>> +#endif
>
> I'm against this. Global mutable data is always bad. If you have 2
> uses ffmpeg in the same process, their efficiency might get halved
> because the available pool size is shared by both. The macro mess is
> very ugly. Can we please not define pages of code as macros. Last but
> not least this is the job of the memory manager, which already does
> fancy things like per thread pools. I don't trust the atomics use
> either, I'm don't want to have to debug that ever.
>

I generally agree on the global state. We should not be writing new
global state into our libraries, we're working on getting rid of that,
not introduce more.

- Hendrik
Rostislav Pehlivanov Jan. 20, 2018, 11:13 a.m. UTC | #6
On 20 January 2018 at 10:22, wm4 <nfxjfg@googlemail.com> wrote:

> On Sat, 20 Jan 2018 11:29:13 +0700
> Muhammad Faiz <mfcc64@gmail.com> wrote:
>
> > Help avoiding malloc-free cycles when allocating-freeing common
> > structures.
> >
> > Signed-off-by: Muhammad Faiz <mfcc64@gmail.com>
> > ---
> >  libavutil/staticpool.h | 117 ++++++++++++++++++++++++++++++
> +++++++++++++++++++
> >  1 file changed, 117 insertions(+)
> >  create mode 100644 libavutil/staticpool.h
> >
> > diff --git a/libavutil/staticpool.h b/libavutil/staticpool.h
> > new file mode 100644
> > index 0000000000..9c9b2784bc
> > --- /dev/null
> > +++ b/libavutil/staticpool.h
> > @@ -0,0 +1,117 @@
> > +/*
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> > + */
> > +
> > +#ifndef AVUTIL_STATICPOOL_H
> > +#define AVUTIL_STATICPOOL_H
> > +
> > +#include <stdatomic.h>
> > +#include "avassert.h"
> > +#include "mem.h"
> > +
> > +/**
> > + * FF_STATICPOOL allocate memory without av_malloc if possible
> > + * @param size must be 2^n between 64 and 4096
> > + */
> > +#define FF_STATICPOOL_DECLARE(type, size)
>          \
> > +typedef struct type##_StaticPoolWrapper {
>          \
> > +    type        buf;
>         \
> > +    unsigned    index;
>         \
> > +    atomic_uint next;
>          \
> > +} type##_StaticPoolWrapper;
>          \
> > +
>         \
> > +static atomic_uint              type##_staticpool_next;
>          \
> > +static atomic_uint              type##_staticpool_last;
>          \
> > +static type##_StaticPoolWrapper type##_staticpool_table[size];
>         \
> > +
>         \
> > +static type *type##_staticpool_malloc(void)
>          \
> > +{
>          \
> > +    unsigned val, index, serial, new_val;
>          \
> > +
>         \
> > +    av_assert0((size) >= 64 && (size) <= 4096 && !((size) & ((size) -
> 1)));     \
> > +
>         \
> > +    /* use serial, avoid spinlock */
>         \
> > +    /* acquire, so we don't get stalled table[index].next */
>         \
> > +    val = atomic_load_explicit(&type##_staticpool_next,
> memory_order_acquire);  \
> > +    do {
>         \
> > +        index  = val & ((size) - 1);
>         \
> > +        serial = val & ~((size) - 1);
>          \
> > +        new_val = atomic_load_explicit(&type##_staticpool_table[index].next,
> memory_order_relaxed) | (serial + (size)); \
> > +    } while (!atomic_compare_exchange_strong_explicit(&type##_staticpool_next,
> &val, new_val, \
> > +
> memory_order_acquire, memory_order_acquire)); \
> > +
>         \
> > +    index = val & ((size) - 1);
>          \
> > +    if (index)
>         \
> > +        return &type##_staticpool_table[index].buf;
>          \
> > +
>         \
> > +    index = atomic_fetch_add_explicit(&type##_staticpool_last, 1,
> memory_order_relaxed) + 1; \
> > +    if (index < (size)) {
>          \
> > +        type##_staticpool_table[index].index = index;
>          \
> > +        return &type##_staticpool_table[index].buf;
>          \
> > +    }
>          \
> > +
>         \
> > +    atomic_fetch_add_explicit(&type##_staticpool_last, -1,
> memory_order_relaxed); \
> > +    return av_malloc(sizeof(type));
>          \
> > +}
>          \
> > +
>         \
> > +static inline type *type##_staticpool_mallocz(void)
>          \
> > +{
>          \
> > +    type *ptr = type##_staticpool_malloc();
>          \
> > +    if (ptr)
>         \
> > +        memset(ptr, 0, sizeof(*ptr));
>          \
> > +    return ptr;
>          \
> > +}
>          \
> > +
>         \
> > +static void type##_staticpool_free(type *ptr)
>          \
> > +{
>          \
> > +    type##_StaticPoolWrapper *entry = (type##_StaticPoolWrapper *)
> ptr;         \
> > +    unsigned val, serial, index, new_val;
>          \
> > +
>         \
> > +    if ((uintptr_t)ptr <= (uintptr_t)(type##_staticpool_table) ||
>          \
> > +        (uintptr_t)ptr >= (uintptr_t)(type##_staticpool_table + size))
> {        \
> > +        av_free(ptr);
>          \
> > +        return;
>          \
> > +    }
>          \
> > +
>         \
> > +    if (CONFIG_MEMORY_POISONING)
>         \
> > +        memset(&entry->buf, FF_MEMORY_POISON, sizeof(entry->buf));
>         \
> > +
>         \
> > +    val = atomic_load_explicit(&type##_staticpool_next,
> memory_order_relaxed);  \
> > +    do {
>         \
> > +        index  = val & ((size) - 1);
>         \
> > +        serial = val & ~((size) - 1);
>          \
> > +        atomic_store_explicit(&entry->next, index,
> memory_order_relaxed);       \
> > +        new_val = entry->index | (serial + (size));
>          \
> > +    } while (!atomic_compare_exchange_strong_explicit(&type##_staticpool_next,
> &val, new_val, \
> > +
> memory_order_release, memory_order_relaxed)); \
> > +}
>          \
> > +
>         \
> > +static inline void type##_staticpool_freep(type **ptr)
>         \
> > +{
>          \
> > +    if (ptr) {
>         \
> > +        type##_staticpool_free(*ptr);
>          \
> > +        *ptr = NULL;
>         \
> > +    }
>          \
> > +}
>          \
> > +/* FF_STATICPOOL_DECLARE */
> > +
> > +#define FF_STATICPOOL_MALLOC(type) type##_staticpool_malloc()
> > +#define FF_STATICPOOL_MALLOCZ(type) type##_staticpool_mallocz()
> > +#define FF_STATICPOOL_FREE(type, v) type##_staticpool_free(v)
> > +#define FF_STATICPOOL_FREEP(type, v) type##_staticpool_freep(v)
> > +
> > +#endif
>
> I'm against this. Global mutable data is always bad. If you have 2
> uses ffmpeg in the same process, their efficiency might get halved
> because the available pool size is shared by both. The macro mess is
> very ugly. Can we please not define pages of code as macros. Last but
> not least this is the job of the memory manager, which already does
> fancy things like per thread pools. I don't trust the atomics use
> either, I'm don't want to have to debug that ever.
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>


I agree.
Also the last GCC version has multithreaded malloc cache already
implemented so it makes this patch only useful on non-glibc systems (or
systems which don't implement malloc caching).
Rostislav Pehlivanov Jan. 20, 2018, 11:32 a.m. UTC | #7
On 20 January 2018 at 11:13, Rostislav Pehlivanov <atomnuker@gmail.com>
wrote:

>
>
> On 20 January 2018 at 10:22, wm4 <nfxjfg@googlemail.com> wrote:
>
>> On Sat, 20 Jan 2018 11:29:13 +0700
>> Muhammad Faiz <mfcc64@gmail.com> wrote:
>>
>> > Help avoiding malloc-free cycles when allocating-freeing common
>> > structures.
>> >
>> > Signed-off-by: Muhammad Faiz <mfcc64@gmail.com>
>> > ---
>> >  libavutil/staticpool.h | 117 ++++++++++++++++++++++++++++++
>> +++++++++++++++++++
>> >  1 file changed, 117 insertions(+)
>> >  create mode 100644 libavutil/staticpool.h
>> >
>> > diff --git a/libavutil/staticpool.h b/libavutil/staticpool.h
>> > new file mode 100644
>> > index 0000000000..9c9b2784bc
>> > --- /dev/null
>> > +++ b/libavutil/staticpool.h
>> > @@ -0,0 +1,117 @@
>> > +/*
>> > + * This file is part of FFmpeg.
>> > + *
>> > + * FFmpeg is free software; you can redistribute it and/or
>> > + * modify it under the terms of the GNU Lesser General Public
>> > + * License as published by the Free Software Foundation; either
>> > + * version 2.1 of the License, or (at your option) any later version.
>> > + *
>> > + * FFmpeg is distributed in the hope that it will be useful,
>> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> > + * Lesser General Public License for more details.
>> > + *
>> > + * You should have received a copy of the GNU Lesser General Public
>> > + * License along with FFmpeg; if not, write to the Free Software
>> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
>> 02110-1301 USA
>> > + */
>> > +
>> > +#ifndef AVUTIL_STATICPOOL_H
>> > +#define AVUTIL_STATICPOOL_H
>> > +
>> > +#include <stdatomic.h>
>> > +#include "avassert.h"
>> > +#include "mem.h"
>> > +
>> > +/**
>> > + * FF_STATICPOOL allocate memory without av_malloc if possible
>> > + * @param size must be 2^n between 64 and 4096
>> > + */
>> > +#define FF_STATICPOOL_DECLARE(type, size)
>>          \
>> > +typedef struct type##_StaticPoolWrapper {
>>          \
>> > +    type        buf;
>>           \
>> > +    unsigned    index;
>>           \
>> > +    atomic_uint next;
>>          \
>> > +} type##_StaticPoolWrapper;
>>          \
>> > +
>>           \
>> > +static atomic_uint              type##_staticpool_next;
>>          \
>> > +static atomic_uint              type##_staticpool_last;
>>          \
>> > +static type##_StaticPoolWrapper type##_staticpool_table[size];
>>           \
>> > +
>>           \
>> > +static type *type##_staticpool_malloc(void)
>>            \
>> > +{
>>          \
>> > +    unsigned val, index, serial, new_val;
>>          \
>> > +
>>           \
>> > +    av_assert0((size) >= 64 && (size) <= 4096 && !((size) & ((size) -
>> 1)));     \
>> > +
>>           \
>> > +    /* use serial, avoid spinlock */
>>           \
>> > +    /* acquire, so we don't get stalled table[index].next */
>>           \
>> > +    val = atomic_load_explicit(&type##_staticpool_next,
>> memory_order_acquire);  \
>> > +    do {
>>           \
>> > +        index  = val & ((size) - 1);
>>           \
>> > +        serial = val & ~((size) - 1);
>>          \
>> > +        new_val = atomic_load_explicit(&type##_staticpool_table[index].next,
>> memory_order_relaxed) | (serial + (size)); \
>> > +    } while (!atomic_compare_exchange_stro
>> ng_explicit(&type##_staticpool_next, &val, new_val, \
>> > +
>> memory_order_acquire, memory_order_acquire)); \
>> > +
>>           \
>> > +    index = val & ((size) - 1);
>>          \
>> > +    if (index)
>>           \
>> > +        return &type##_staticpool_table[index].buf;
>>            \
>> > +
>>           \
>> > +    index = atomic_fetch_add_explicit(&type##_staticpool_last, 1,
>> memory_order_relaxed) + 1; \
>> > +    if (index < (size)) {
>>          \
>> > +        type##_staticpool_table[index].index = index;
>>            \
>> > +        return &type##_staticpool_table[index].buf;
>>            \
>> > +    }
>>          \
>> > +
>>           \
>> > +    atomic_fetch_add_explicit(&type##_staticpool_last, -1,
>> memory_order_relaxed); \
>> > +    return av_malloc(sizeof(type));
>>          \
>> > +}
>>          \
>> > +
>>           \
>> > +static inline type *type##_staticpool_mallocz(void)
>>            \
>> > +{
>>          \
>> > +    type *ptr = type##_staticpool_malloc();
>>          \
>> > +    if (ptr)
>>           \
>> > +        memset(ptr, 0, sizeof(*ptr));
>>          \
>> > +    return ptr;
>>          \
>> > +}
>>          \
>> > +
>>           \
>> > +static void type##_staticpool_free(type *ptr)
>>          \
>> > +{
>>          \
>> > +    type##_StaticPoolWrapper *entry = (type##_StaticPoolWrapper *)
>> ptr;         \
>> > +    unsigned val, serial, index, new_val;
>>          \
>> > +
>>           \
>> > +    if ((uintptr_t)ptr <= (uintptr_t)(type##_staticpool_table) ||
>>            \
>> > +        (uintptr_t)ptr >= (uintptr_t)(type##_staticpool_table +
>> size)) {        \
>> > +        av_free(ptr);
>>          \
>> > +        return;
>>          \
>> > +    }
>>          \
>> > +
>>           \
>> > +    if (CONFIG_MEMORY_POISONING)
>>           \
>> > +        memset(&entry->buf, FF_MEMORY_POISON, sizeof(entry->buf));
>>           \
>> > +
>>           \
>> > +    val = atomic_load_explicit(&type##_staticpool_next,
>> memory_order_relaxed);  \
>> > +    do {
>>           \
>> > +        index  = val & ((size) - 1);
>>           \
>> > +        serial = val & ~((size) - 1);
>>          \
>> > +        atomic_store_explicit(&entry->next, index,
>> memory_order_relaxed);       \
>> > +        new_val = entry->index | (serial + (size));
>>          \
>> > +    } while (!atomic_compare_exchange_stro
>> ng_explicit(&type##_staticpool_next, &val, new_val, \
>> > +
>> memory_order_release, memory_order_relaxed)); \
>> > +}
>>          \
>> > +
>>           \
>> > +static inline void type##_staticpool_freep(type **ptr)
>>           \
>> > +{
>>          \
>> > +    if (ptr) {
>>           \
>> > +        type##_staticpool_free(*ptr);
>>          \
>> > +        *ptr = NULL;
>>           \
>> > +    }
>>          \
>> > +}
>>          \
>> > +/* FF_STATICPOOL_DECLARE */
>> > +
>> > +#define FF_STATICPOOL_MALLOC(type) type##_staticpool_malloc()
>> > +#define FF_STATICPOOL_MALLOCZ(type) type##_staticpool_mallocz()
>> > +#define FF_STATICPOOL_FREE(type, v) type##_staticpool_free(v)
>> > +#define FF_STATICPOOL_FREEP(type, v) type##_staticpool_freep(v)
>> > +
>> > +#endif
>>
>> I'm against this. Global mutable data is always bad. If you have 2
>> uses ffmpeg in the same process, their efficiency might get halved
>> because the available pool size is shared by both. The macro mess is
>> very ugly. Can we please not define pages of code as macros. Last but
>> not least this is the job of the memory manager, which already does
>> fancy things like per thread pools. I don't trust the atomics use
>> either, I'm don't want to have to debug that ever.
>>
>> _______________________________________________
>> ffmpeg-devel mailing list
>> ffmpeg-devel@ffmpeg.org
>> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
>
>
> I agree.
> Also the last GCC version has multithreaded malloc cache already
> implemented so it makes this patch only useful on non-glibc systems (or
> systems which don't implement malloc caching).
>

Not gcc, sorry, glibc.
Muhammad Faiz Jan. 20, 2018, 2:18 p.m. UTC | #8
On Sat, Jan 20, 2018 at 6:32 PM, Rostislav Pehlivanov
<atomnuker@gmail.com> wrote:
> On 20 January 2018 at 11:13, Rostislav Pehlivanov <atomnuker@gmail.com>
> wrote:
>
>>
>>
>> On 20 January 2018 at 10:22, wm4 <nfxjfg@googlemail.com> wrote:
>>
>>> On Sat, 20 Jan 2018 11:29:13 +0700
>>> Muhammad Faiz <mfcc64@gmail.com> wrote:
>>>
>>> > Help avoiding malloc-free cycles when allocating-freeing common
>>> > structures.
>>> >
>>> > Signed-off-by: Muhammad Faiz <mfcc64@gmail.com>
>>> > ---
>>> >  libavutil/staticpool.h | 117 ++++++++++++++++++++++++++++++
>>> +++++++++++++++++++
>>> >  1 file changed, 117 insertions(+)
>>> >  create mode 100644 libavutil/staticpool.h
>>> >
>>> > diff --git a/libavutil/staticpool.h b/libavutil/staticpool.h
>>> > new file mode 100644
>>> > index 0000000000..9c9b2784bc
>>> > --- /dev/null
>>> > +++ b/libavutil/staticpool.h
>>> > @@ -0,0 +1,117 @@
>>> > +/*
>>> > + * This file is part of FFmpeg.
>>> > + *
>>> > + * FFmpeg is free software; you can redistribute it and/or
>>> > + * modify it under the terms of the GNU Lesser General Public
>>> > + * License as published by the Free Software Foundation; either
>>> > + * version 2.1 of the License, or (at your option) any later version.
>>> > + *
>>> > + * FFmpeg is distributed in the hope that it will be useful,
>>> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>> > + * Lesser General Public License for more details.
>>> > + *
>>> > + * You should have received a copy of the GNU Lesser General Public
>>> > + * License along with FFmpeg; if not, write to the Free Software
>>> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
>>> 02110-1301 USA
>>> > + */
>>> > +
>>> > +#ifndef AVUTIL_STATICPOOL_H
>>> > +#define AVUTIL_STATICPOOL_H
>>> > +
>>> > +#include <stdatomic.h>
>>> > +#include "avassert.h"
>>> > +#include "mem.h"
>>> > +
>>> > +/**
>>> > + * FF_STATICPOOL allocate memory without av_malloc if possible
>>> > + * @param size must be 2^n between 64 and 4096
>>> > + */
>>> > +#define FF_STATICPOOL_DECLARE(type, size)
>>>          \
>>> > +typedef struct type##_StaticPoolWrapper {
>>>          \
>>> > +    type        buf;
>>>           \
>>> > +    unsigned    index;
>>>           \
>>> > +    atomic_uint next;
>>>          \
>>> > +} type##_StaticPoolWrapper;
>>>          \
>>> > +
>>>           \
>>> > +static atomic_uint              type##_staticpool_next;
>>>          \
>>> > +static atomic_uint              type##_staticpool_last;
>>>          \
>>> > +static type##_StaticPoolWrapper type##_staticpool_table[size];
>>>           \
>>> > +
>>>           \
>>> > +static type *type##_staticpool_malloc(void)
>>>            \
>>> > +{
>>>          \
>>> > +    unsigned val, index, serial, new_val;
>>>          \
>>> > +
>>>           \
>>> > +    av_assert0((size) >= 64 && (size) <= 4096 && !((size) & ((size) -
>>> 1)));     \
>>> > +
>>>           \
>>> > +    /* use serial, avoid spinlock */
>>>           \
>>> > +    /* acquire, so we don't get stalled table[index].next */
>>>           \
>>> > +    val = atomic_load_explicit(&type##_staticpool_next,
>>> memory_order_acquire);  \
>>> > +    do {
>>>           \
>>> > +        index  = val & ((size) - 1);
>>>           \
>>> > +        serial = val & ~((size) - 1);
>>>          \
>>> > +        new_val = atomic_load_explicit(&type##_staticpool_table[index].next,
>>> memory_order_relaxed) | (serial + (size)); \
>>> > +    } while (!atomic_compare_exchange_stro
>>> ng_explicit(&type##_staticpool_next, &val, new_val, \
>>> > +
>>> memory_order_acquire, memory_order_acquire)); \
>>> > +
>>>           \
>>> > +    index = val & ((size) - 1);
>>>          \
>>> > +    if (index)
>>>           \
>>> > +        return &type##_staticpool_table[index].buf;
>>>            \
>>> > +
>>>           \
>>> > +    index = atomic_fetch_add_explicit(&type##_staticpool_last, 1,
>>> memory_order_relaxed) + 1; \
>>> > +    if (index < (size)) {
>>>          \
>>> > +        type##_staticpool_table[index].index = index;
>>>            \
>>> > +        return &type##_staticpool_table[index].buf;
>>>            \
>>> > +    }
>>>          \
>>> > +
>>>           \
>>> > +    atomic_fetch_add_explicit(&type##_staticpool_last, -1,
>>> memory_order_relaxed); \
>>> > +    return av_malloc(sizeof(type));
>>>          \
>>> > +}
>>>          \
>>> > +
>>>           \
>>> > +static inline type *type##_staticpool_mallocz(void)
>>>            \
>>> > +{
>>>          \
>>> > +    type *ptr = type##_staticpool_malloc();
>>>          \
>>> > +    if (ptr)
>>>           \
>>> > +        memset(ptr, 0, sizeof(*ptr));
>>>          \
>>> > +    return ptr;
>>>          \
>>> > +}
>>>          \
>>> > +
>>>           \
>>> > +static void type##_staticpool_free(type *ptr)
>>>          \
>>> > +{
>>>          \
>>> > +    type##_StaticPoolWrapper *entry = (type##_StaticPoolWrapper *)
>>> ptr;         \
>>> > +    unsigned val, serial, index, new_val;
>>>          \
>>> > +
>>>           \
>>> > +    if ((uintptr_t)ptr <= (uintptr_t)(type##_staticpool_table) ||
>>>            \
>>> > +        (uintptr_t)ptr >= (uintptr_t)(type##_staticpool_table +
>>> size)) {        \
>>> > +        av_free(ptr);
>>>          \
>>> > +        return;
>>>          \
>>> > +    }
>>>          \
>>> > +
>>>           \
>>> > +    if (CONFIG_MEMORY_POISONING)
>>>           \
>>> > +        memset(&entry->buf, FF_MEMORY_POISON, sizeof(entry->buf));
>>>           \
>>> > +
>>>           \
>>> > +    val = atomic_load_explicit(&type##_staticpool_next,
>>> memory_order_relaxed);  \
>>> > +    do {
>>>           \
>>> > +        index  = val & ((size) - 1);
>>>           \
>>> > +        serial = val & ~((size) - 1);
>>>          \
>>> > +        atomic_store_explicit(&entry->next, index,
>>> memory_order_relaxed);       \
>>> > +        new_val = entry->index | (serial + (size));
>>>          \
>>> > +    } while (!atomic_compare_exchange_stro
>>> ng_explicit(&type##_staticpool_next, &val, new_val, \
>>> > +
>>> memory_order_release, memory_order_relaxed)); \
>>> > +}
>>>          \
>>> > +
>>>           \
>>> > +static inline void type##_staticpool_freep(type **ptr)
>>>           \
>>> > +{
>>>          \
>>> > +    if (ptr) {
>>>           \
>>> > +        type##_staticpool_free(*ptr);
>>>          \
>>> > +        *ptr = NULL;
>>>           \
>>> > +    }
>>>          \
>>> > +}
>>>          \
>>> > +/* FF_STATICPOOL_DECLARE */
>>> > +
>>> > +#define FF_STATICPOOL_MALLOC(type) type##_staticpool_malloc()
>>> > +#define FF_STATICPOOL_MALLOCZ(type) type##_staticpool_mallocz()
>>> > +#define FF_STATICPOOL_FREE(type, v) type##_staticpool_free(v)
>>> > +#define FF_STATICPOOL_FREEP(type, v) type##_staticpool_freep(v)
>>> > +
>>> > +#endif
>>>
>>> I'm against this. Global mutable data is always bad. If you have 2
>>> uses ffmpeg in the same process, their efficiency might get halved
>>> because the available pool size is shared by both. The macro mess is
>>> very ugly. Can we please not define pages of code as macros. Last but
>>> not least this is the job of the memory manager, which already does
>>> fancy things like per thread pools. I don't trust the atomics use
>>> either, I'm don't want to have to debug that ever.
>>>
>>> _______________________________________________
>>> ffmpeg-devel mailing list
>>> ffmpeg-devel@ffmpeg.org
>>> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>>
>>
>>
>> I agree.
>> Also the last GCC version has multithreaded malloc cache already
>> implemented so it makes this patch only useful on non-glibc systems (or
>> systems which don't implement malloc caching).
>>
>
> Not gcc, sorry, glibc.

Interesting. Can you provide a benchmark for new version of glibc? My
glibc is outdated.

Thank's.
Muhammad Faiz Jan. 21, 2018, 3:03 a.m. UTC | #9
On Sat, Jan 20, 2018 at 5:18 PM, wm4 <nfxjfg@googlemail.com> wrote:
> On Sat, 20 Jan 2018 12:52:46 +0700
> Muhammad Faiz <mfcc64@gmail.com> wrote:
>
>> On Sat, Jan 20, 2018 at 11:49 AM, James Almer <jamrial@gmail.com> wrote:
>> > On 1/20/2018 1:29 AM, Muhammad Faiz wrote:
>> >> Help avoiding malloc-free cycles when allocating-freeing common
>> >> structures.
>> >>
>> >> Signed-off-by: Muhammad Faiz <mfcc64@gmail.com>
>> >> ---
>> >>  libavutil/staticpool.h | 117 +++++++++++++++++++++++++++++++++++++++++++++++++
>> >>  1 file changed, 117 insertions(+)
>> >>  create mode 100644 libavutil/staticpool.h
>> >>
>> >> diff --git a/libavutil/staticpool.h b/libavutil/staticpool.h
>> >> new file mode 100644
>> >> index 0000000000..9c9b2784bc
>> >> --- /dev/null
>> >> +++ b/libavutil/staticpool.h
>> >> @@ -0,0 +1,117 @@
>> >> +/*
>> >> + * This file is part of FFmpeg.
>> >> + *
>> >> + * FFmpeg is free software; you can redistribute it and/or
>> >> + * modify it under the terms of the GNU Lesser General Public
>> >> + * License as published by the Free Software Foundation; either
>> >> + * version 2.1 of the License, or (at your option) any later version.
>> >> + *
>> >> + * FFmpeg is distributed in the hope that it will be useful,
>> >> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> >> + * Lesser General Public License for more details.
>> >> + *
>> >> + * You should have received a copy of the GNU Lesser General Public
>> >> + * License along with FFmpeg; if not, write to the Free Software
>> >> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>> >> + */
>> >> +
>> >> +#ifndef AVUTIL_STATICPOOL_H
>> >> +#define AVUTIL_STATICPOOL_H
>> >> +
>> >> +#include <stdatomic.h>
>> >> +#include "avassert.h"
>> >> +#include "mem.h"
>> >> +
>> >> +/**
>> >> + * FF_STATICPOOL allocate memory without av_malloc if possible
>> >> + * @param size must be 2^n between 64 and 4096
>> >> + */
>> >> +#define FF_STATICPOOL_DECLARE(type, size)                                       \
>> >> +typedef struct type##_StaticPoolWrapper {                                       \
>> >> +    type        buf;                                                            \
>> >> +    unsigned    index;                                                          \
>> >> +    atomic_uint next;                                                           \
>> >> +} type##_StaticPoolWrapper;                                                     \
>> >> +                                                                                \
>> >> +static atomic_uint              type##_staticpool_next;                         \
>> >> +static atomic_uint              type##_staticpool_last;                         \
>> >> +static type##_StaticPoolWrapper type##_staticpool_table[size];                  \
>> >> +                                                                                \
>> >> +static type *type##_staticpool_malloc(void)                                     \
>> >> +{                                                                               \
>> >> +    unsigned val, index, serial, new_val;                                       \
>> >> +                                                                                \
>> >> +    av_assert0((size) >= 64 && (size) <= 4096 && !((size) & ((size) - 1)));     \
>> >> +                                                                                \
>> >> +    /* use serial, avoid spinlock */                                            \
>> >> +    /* acquire, so we don't get stalled table[index].next */                    \
>> >> +    val = atomic_load_explicit(&type##_staticpool_next, memory_order_acquire);  \
>> >> +    do {                                                                        \
>> >> +        index  = val & ((size) - 1);                                            \
>> >> +        serial = val & ~((size) - 1);                                           \
>> >> +        new_val = atomic_load_explicit(&type##_staticpool_table[index].next, memory_order_relaxed) | (serial + (size)); \
>> >> +    } while (!atomic_compare_exchange_strong_explicit(&type##_staticpool_next, &val, new_val, \
>> >
>> > The wrappers for atomic_compare_exchange_* in the compat folder are not
>> > really working and fixing them is supposedly not trivial, so this will
>> > only work with GCC and Clang but not with for example MSVC or SunCC.
>>
>> What's the problem? I only see that stdatomic compat make typedef
>> every atomic type to intptr_t, so atomic_*64_t won't work if
>> sizeof(intptr_t) == 32.
>> Here I use atomic_uint, so I guess it will work.
>>
>> Note that if atomic_compare_exchange_* is broken then atomic_fetch_*
>> will also be broken because atomic_fetch_* call
>> atomic_compare_exchange_* on suncc compat.
>>
>> >
>> > Can you implement this using mutexes instead, or otherwise avoid using
>> > atomic_compare_exchange_*? You can even use static mutex initialization
>> > now for all targets, and not just native pthreads.
>>
>> Using mutex makes implementation slower.
>> Using atomic_exchange requires spin lock.
>
> Uncontended mutexes are spinlocks.

Of course.
But this code isn't spinlock, so probably the performance won't suffer
so much when contended.
Muhammad Faiz Jan. 21, 2018, 3:24 a.m. UTC | #10
On Sat, Jan 20, 2018 at 5:22 PM, wm4 <nfxjfg@googlemail.com> wrote:
> On Sat, 20 Jan 2018 11:29:13 +0700
> Muhammad Faiz <mfcc64@gmail.com> wrote:
>
>> Help avoiding malloc-free cycles when allocating-freeing common
>> structures.
>>
>> Signed-off-by: Muhammad Faiz <mfcc64@gmail.com>
>> ---
>>  libavutil/staticpool.h | 117 +++++++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 117 insertions(+)
>>  create mode 100644 libavutil/staticpool.h
>>
>> diff --git a/libavutil/staticpool.h b/libavutil/staticpool.h
>> new file mode 100644
>> index 0000000000..9c9b2784bc
>> --- /dev/null
>> +++ b/libavutil/staticpool.h
>> @@ -0,0 +1,117 @@
>> +/*
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>> + */
>> +
>> +#ifndef AVUTIL_STATICPOOL_H
>> +#define AVUTIL_STATICPOOL_H
>> +
>> +#include <stdatomic.h>
>> +#include "avassert.h"
>> +#include "mem.h"
>> +
>> +/**
>> + * FF_STATICPOOL allocate memory without av_malloc if possible
>> + * @param size must be 2^n between 64 and 4096
>> + */
>> +#define FF_STATICPOOL_DECLARE(type, size)                                       \
>> +typedef struct type##_StaticPoolWrapper {                                       \
>> +    type        buf;                                                            \
>> +    unsigned    index;                                                          \
>> +    atomic_uint next;                                                           \
>> +} type##_StaticPoolWrapper;                                                     \
>> +                                                                                \
>> +static atomic_uint              type##_staticpool_next;                         \
>> +static atomic_uint              type##_staticpool_last;                         \
>> +static type##_StaticPoolWrapper type##_staticpool_table[size];                  \
>> +                                                                                \
>> +static type *type##_staticpool_malloc(void)                                     \
>> +{                                                                               \
>> +    unsigned val, index, serial, new_val;                                       \
>> +                                                                                \
>> +    av_assert0((size) >= 64 && (size) <= 4096 && !((size) & ((size) - 1)));     \
>> +                                                                                \
>> +    /* use serial, avoid spinlock */                                            \
>> +    /* acquire, so we don't get stalled table[index].next */                    \
>> +    val = atomic_load_explicit(&type##_staticpool_next, memory_order_acquire);  \
>> +    do {                                                                        \
>> +        index  = val & ((size) - 1);                                            \
>> +        serial = val & ~((size) - 1);                                           \
>> +        new_val = atomic_load_explicit(&type##_staticpool_table[index].next, memory_order_relaxed) | (serial + (size)); \
>> +    } while (!atomic_compare_exchange_strong_explicit(&type##_staticpool_next, &val, new_val, \
>> +                                                      memory_order_acquire, memory_order_acquire)); \
>> +                                                                                \
>> +    index = val & ((size) - 1);                                                 \
>> +    if (index)                                                                  \
>> +        return &type##_staticpool_table[index].buf;                             \
>> +                                                                                \
>> +    index = atomic_fetch_add_explicit(&type##_staticpool_last, 1, memory_order_relaxed) + 1; \
>> +    if (index < (size)) {                                                       \
>> +        type##_staticpool_table[index].index = index;                           \
>> +        return &type##_staticpool_table[index].buf;                             \
>> +    }                                                                           \
>> +                                                                                \
>> +    atomic_fetch_add_explicit(&type##_staticpool_last, -1, memory_order_relaxed); \
>> +    return av_malloc(sizeof(type));                                             \
>> +}                                                                               \
>> +                                                                                \
>> +static inline type *type##_staticpool_mallocz(void)                             \
>> +{                                                                               \
>> +    type *ptr = type##_staticpool_malloc();                                     \
>> +    if (ptr)                                                                    \
>> +        memset(ptr, 0, sizeof(*ptr));                                           \
>> +    return ptr;                                                                 \
>> +}                                                                               \
>> +                                                                                \
>> +static void type##_staticpool_free(type *ptr)                                   \
>> +{                                                                               \
>> +    type##_StaticPoolWrapper *entry = (type##_StaticPoolWrapper *) ptr;         \
>> +    unsigned val, serial, index, new_val;                                       \
>> +                                                                                \
>> +    if ((uintptr_t)ptr <= (uintptr_t)(type##_staticpool_table) ||               \
>> +        (uintptr_t)ptr >= (uintptr_t)(type##_staticpool_table + size)) {        \
>> +        av_free(ptr);                                                           \
>> +        return;                                                                 \
>> +    }                                                                           \
>> +                                                                                \
>> +    if (CONFIG_MEMORY_POISONING)                                                \
>> +        memset(&entry->buf, FF_MEMORY_POISON, sizeof(entry->buf));              \
>> +                                                                                \
>> +    val = atomic_load_explicit(&type##_staticpool_next, memory_order_relaxed);  \
>> +    do {                                                                        \
>> +        index  = val & ((size) - 1);                                            \
>> +        serial = val & ~((size) - 1);                                           \
>> +        atomic_store_explicit(&entry->next, index, memory_order_relaxed);       \
>> +        new_val = entry->index | (serial + (size));                             \
>> +    } while (!atomic_compare_exchange_strong_explicit(&type##_staticpool_next, &val, new_val, \
>> +                                                      memory_order_release, memory_order_relaxed)); \
>> +}                                                                               \
>> +                                                                                \
>> +static inline void type##_staticpool_freep(type **ptr)                          \
>> +{                                                                               \
>> +    if (ptr) {                                                                  \
>> +        type##_staticpool_free(*ptr);                                           \
>> +        *ptr = NULL;                                                            \
>> +    }                                                                           \
>> +}                                                                               \
>> +/* FF_STATICPOOL_DECLARE */
>> +
>> +#define FF_STATICPOOL_MALLOC(type) type##_staticpool_malloc()
>> +#define FF_STATICPOOL_MALLOCZ(type) type##_staticpool_mallocz()
>> +#define FF_STATICPOOL_FREE(type, v) type##_staticpool_free(v)
>> +#define FF_STATICPOOL_FREEP(type, v) type##_staticpool_freep(v)
>> +
>> +#endif
>
> I'm against this. Global mutable data is always bad. If you have 2
> uses ffmpeg in the same process, their efficiency might get halved
> because the available pool size is shared by both.

The performance will suffer if number of pool usage is exceeding
number of available pool. In that case, the performance will be equal
with av_malloc.
Just for example, assuming that one usage uses 100 pools, it will
require 10 usages to make the performance suffer.


>                                          The macro mess is
> very ugly. Can we please not define pages of code as macros.

I can write this on some part without macros. The macros mainly is for
hiding the static structs and variables.


>                                          Last but
> not least this is the job of the memory manager, which already does
> fancy things like per thread pools.

Do you mean that AVBufferPool is pointless?
I'm waiting anybody to give benchmark result between staticpool and
newer version of glibc that uses per thread pools. I will drop this
patch if staticpool is slower or on par with glibc.


>                                          I don't trust the atomics use
> either, I'm don't want to have to debug that ever.

Of course, using atomics is more complicated that using mutex (with
benefits that it will be faster when properly used).
But it is not a valid reason to avoid using atomic because it is more
complicated.

Thank's.
wm4 Jan. 21, 2018, 12:11 p.m. UTC | #11
On Sun, 21 Jan 2018 10:24:21 +0700
Muhammad Faiz <mfcc64@gmail.com> wrote:

> >                                          I don't trust the atomics use
> > either, I'm don't want to have to debug that ever.  
> 
> Of course, using atomics is more complicated that using mutex (with
> benefits that it will be faster when properly used).
> But it is not a valid reason to avoid using atomic because it is more
> complicated.

Sure, but it also means it should be really be confined to cases where
it _really_ helps with performance.

Where is this a bottleneck at all?

I also think that this really belongs into a malloc implementation
instead. You might also want to try "alternative" malloc
implementations like jemalloc.

Another bad point is that this will make interaction with memory
debuggers worse. (They can't track whether the memory is considered
allocated or free.)
Muhammad Faiz Jan. 22, 2018, 12:21 a.m. UTC | #12
On Sun, Jan 21, 2018 at 7:11 PM, wm4 <nfxjfg@googlemail.com> wrote:
> On Sun, 21 Jan 2018 10:24:21 +0700
> Muhammad Faiz <mfcc64@gmail.com> wrote:
>
>> >                                          I don't trust the atomics use
>> > either, I'm don't want to have to debug that ever.
>>
>> Of course, using atomics is more complicated that using mutex (with
>> benefits that it will be faster when properly used).
>> But it is not a valid reason to avoid using atomic because it is more
>> complicated.
>
> Sure, but it also means it should be really be confined to cases where
> it _really_ helps with performance.
>
> Where is this a bottleneck at all?

Performance difference is noticeable with audio-only stuff.
Because audio processing is typically fast, malloc-free cycle of
AVFrame, AVBuffer, etc becomes bottlenecks.


>
> I also think that this really belongs into a malloc implementation
> instead. You might also want to try "alternative" malloc
> implementations like jemalloc.

jemalloc nicely is fast. The performance is on par with staticpool and
even faster on high contending situation. I hope that new glibc-malloc
is also fast. So I drop the patch.

With this malloc performance, usage of AVBufferPool on audio frame
becomes questionable.

Thank's.
diff mbox

Patch

diff --git a/libavutil/staticpool.h b/libavutil/staticpool.h
new file mode 100644
index 0000000000..9c9b2784bc
--- /dev/null
+++ b/libavutil/staticpool.h
@@ -0,0 +1,117 @@ 
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_STATICPOOL_H
+#define AVUTIL_STATICPOOL_H
+
+#include <stdatomic.h>
+#include "avassert.h"
+#include "mem.h"
+
+/**
+ * FF_STATICPOOL allocate memory without av_malloc if possible
+ * @param size must be 2^n between 64 and 4096
+ */
+#define FF_STATICPOOL_DECLARE(type, size)                                       \
+typedef struct type##_StaticPoolWrapper {                                       \
+    type        buf;                                                            \
+    unsigned    index;                                                          \
+    atomic_uint next;                                                           \
+} type##_StaticPoolWrapper;                                                     \
+                                                                                \
+static atomic_uint              type##_staticpool_next;                         \
+static atomic_uint              type##_staticpool_last;                         \
+static type##_StaticPoolWrapper type##_staticpool_table[size];                  \
+                                                                                \
+static type *type##_staticpool_malloc(void)                                     \
+{                                                                               \
+    unsigned val, index, serial, new_val;                                       \
+                                                                                \
+    av_assert0((size) >= 64 && (size) <= 4096 && !((size) & ((size) - 1)));     \
+                                                                                \
+    /* use serial, avoid spinlock */                                            \
+    /* acquire, so we don't get stalled table[index].next */                    \
+    val = atomic_load_explicit(&type##_staticpool_next, memory_order_acquire);  \
+    do {                                                                        \
+        index  = val & ((size) - 1);                                            \
+        serial = val & ~((size) - 1);                                           \
+        new_val = atomic_load_explicit(&type##_staticpool_table[index].next, memory_order_relaxed) | (serial + (size)); \
+    } while (!atomic_compare_exchange_strong_explicit(&type##_staticpool_next, &val, new_val, \
+                                                      memory_order_acquire, memory_order_acquire)); \
+                                                                                \
+    index = val & ((size) - 1);                                                 \
+    if (index)                                                                  \
+        return &type##_staticpool_table[index].buf;                             \
+                                                                                \
+    index = atomic_fetch_add_explicit(&type##_staticpool_last, 1, memory_order_relaxed) + 1; \
+    if (index < (size)) {                                                       \
+        type##_staticpool_table[index].index = index;                           \
+        return &type##_staticpool_table[index].buf;                             \
+    }                                                                           \
+                                                                                \
+    atomic_fetch_add_explicit(&type##_staticpool_last, -1, memory_order_relaxed); \
+    return av_malloc(sizeof(type));                                             \
+}                                                                               \
+                                                                                \
+static inline type *type##_staticpool_mallocz(void)                             \
+{                                                                               \
+    type *ptr = type##_staticpool_malloc();                                     \
+    if (ptr)                                                                    \
+        memset(ptr, 0, sizeof(*ptr));                                           \
+    return ptr;                                                                 \
+}                                                                               \
+                                                                                \
+static void type##_staticpool_free(type *ptr)                                   \
+{                                                                               \
+    type##_StaticPoolWrapper *entry = (type##_StaticPoolWrapper *) ptr;         \
+    unsigned val, serial, index, new_val;                                       \
+                                                                                \
+    if ((uintptr_t)ptr <= (uintptr_t)(type##_staticpool_table) ||               \
+        (uintptr_t)ptr >= (uintptr_t)(type##_staticpool_table + size)) {        \
+        av_free(ptr);                                                           \
+        return;                                                                 \
+    }                                                                           \
+                                                                                \
+    if (CONFIG_MEMORY_POISONING)                                                \
+        memset(&entry->buf, FF_MEMORY_POISON, sizeof(entry->buf));              \
+                                                                                \
+    val = atomic_load_explicit(&type##_staticpool_next, memory_order_relaxed);  \
+    do {                                                                        \
+        index  = val & ((size) - 1);                                            \
+        serial = val & ~((size) - 1);                                           \
+        atomic_store_explicit(&entry->next, index, memory_order_relaxed);       \
+        new_val = entry->index | (serial + (size));                             \
+    } while (!atomic_compare_exchange_strong_explicit(&type##_staticpool_next, &val, new_val, \
+                                                      memory_order_release, memory_order_relaxed)); \
+}                                                                               \
+                                                                                \
+static inline void type##_staticpool_freep(type **ptr)                          \
+{                                                                               \
+    if (ptr) {                                                                  \
+        type##_staticpool_free(*ptr);                                           \
+        *ptr = NULL;                                                            \
+    }                                                                           \
+}                                                                               \
+/* FF_STATICPOOL_DECLARE */
+
+#define FF_STATICPOOL_MALLOC(type) type##_staticpool_malloc()
+#define FF_STATICPOOL_MALLOCZ(type) type##_staticpool_mallocz()
+#define FF_STATICPOOL_FREE(type, v) type##_staticpool_free(v)
+#define FF_STATICPOOL_FREEP(type, v) type##_staticpool_freep(v)
+
+#endif