diff mbox

[FFmpeg-devel] avutil/crc: use ff_thread_once at av_crc_get_table

Message ID 20171024093126.21715-1-mfcc64@gmail.com
State Accepted
Commit 8329ae781a75c1b665fc2ffe2e08be2c8207419e
Headers show

Commit Message

Muhammad Faiz Oct. 24, 2017, 9:31 a.m. UTC
Fix tsan warnings.

Signed-off-by: Muhammad Faiz <mfcc64@gmail.com>
---
 libavutil/crc.c | 49 +++++++++++++++++++++++++++++--------------------
 1 file changed, 29 insertions(+), 20 deletions(-)

Comments

Muhammad Faiz Oct. 30, 2017, 7:14 a.m. UTC | #1
On Tue, Oct 24, 2017 at 4:31 PM, Muhammad Faiz <mfcc64@gmail.com> wrote:
> Fix tsan warnings.
>
> Signed-off-by: Muhammad Faiz <mfcc64@gmail.com>
> ---
>  libavutil/crc.c | 49 +++++++++++++++++++++++++++++--------------------
>  1 file changed, 29 insertions(+), 20 deletions(-)

Ping.

My recent benchmark:

old/new:
 296320/ 340400 decicycles in AV_CRC_8_ATM,       1 runs,      0 skips
   1180/   2040 decicycles in AV_CRC_8_ATM,       1 runs,      0 skips
    830/   1460 decicycles in AV_CRC_8_ATM,       2 runs,      0 skips
    620/    995 decicycles in AV_CRC_8_ATM,       4 runs,      0 skips
    505/    797 decicycles in AV_CRC_8_ATM,       8 runs,      0 skips
    487/    653 decicycles in AV_CRC_8_ATM,      16 runs,      0 skips
    448/    546 decicycles in AV_CRC_8_ATM,      32 runs,      0 skips
    435/    531 decicycles in AV_CRC_8_ATM,      64 runs,      0 skips
    445/    479 decicycles in AV_CRC_8_ATM,     128 runs,      0 skips
    454/    451 decicycles in AV_CRC_8_ATM,     256 runs,      0 skips
    469/    436 decicycles in AV_CRC_8_ATM,     512 runs,      0 skips
    459/    429 decicycles in AV_CRC_8_ATM,    1024 runs,      0 skips
    467/    425 decicycles in AV_CRC_8_ATM,    2048 runs,      0 skips

 137180/ 118440 decicycles in AV_CRC_16_ANSI,       1 runs,      0 skips
    720/   1140 decicycles in AV_CRC_16_ANSI,       1 runs,      0 skips
    550/    880 decicycles in AV_CRC_16_ANSI,       2 runs,      0 skips
    515/    700 decicycles in AV_CRC_16_ANSI,       4 runs,      0 skips
    550/    560 decicycles in AV_CRC_16_ANSI,       8 runs,      0 skips
    508/    515 decicycles in AV_CRC_16_ANSI,      16 runs,      0 skips
    493/    520 decicycles in AV_CRC_16_ANSI,      32 runs,      0 skips
    482/    515 decicycles in AV_CRC_16_ANSI,      64 runs,      0 skips
    482/    468 decicycles in AV_CRC_16_ANSI,     128 runs,      0 skips
    481/    443 decicycles in AV_CRC_16_ANSI,     256 runs,      0 skips
    467/    429 decicycles in AV_CRC_16_ANSI,     512 runs,      0 skips
    474/    422 decicycles in AV_CRC_16_ANSI,    1024 runs,      0 skips
    469/    425 decicycles in AV_CRC_16_ANSI,    2048 runs,      0 skips

 124720/ 104060 decicycles in AV_CRC_16_CCITT,       1 runs,      0 skips
    460/    860 decicycles in AV_CRC_16_CCITT,       1 runs,      0 skips
    440/    740 decicycles in AV_CRC_16_CCITT,       2 runs,      0 skips
    505/    660 decicycles in AV_CRC_16_CCITT,       4 runs,      0 skips
    472/    605 decicycles in AV_CRC_16_CCITT,       8 runs,      0 skips
    486/    561 decicycles in AV_CRC_16_CCITT,      16 runs,      0 skips
    466/    557 decicycles in AV_CRC_16_CCITT,      32 runs,      0 skips
    489/    546 decicycles in AV_CRC_16_CCITT,      64 runs,      0 skips
    496/    552 decicycles in AV_CRC_16_CCITT,     128 runs,      0 skips
    459/    522 decicycles in AV_CRC_16_CCITT,     256 runs,      0 skips
    447/    493 decicycles in AV_CRC_16_CCITT,     512 runs,      0 skips
    468/    469 decicycles in AV_CRC_16_CCITT,    1024 runs,      0 skips
    477/    454 decicycles in AV_CRC_16_CCITT,    2048 runs,      0 skips

 123500/ 102600 decicycles in AV_CRC_32_IEEE,       1 runs,      0 skips
    460/   1060 decicycles in AV_CRC_32_IEEE,       1 runs,      0 skips
    420/    850 decicycles in AV_CRC_32_IEEE,       2 runs,      0 skips
    415/    685 decicycles in AV_CRC_32_IEEE,       4 runs,      0 skips
    407/    557 decicycles in AV_CRC_32_IEEE,       8 runs,      0 skips
    402/    540 decicycles in AV_CRC_32_IEEE,      16 runs,      0 skips
    428/    491 decicycles in AV_CRC_32_IEEE,      32 runs,      0 skips
    452/    501 decicycles in AV_CRC_32_IEEE,      64 runs,      0 skips
    466/    507 decicycles in AV_CRC_32_IEEE,     128 runs,      0 skips
    474/    506 decicycles in AV_CRC_32_IEEE,     256 runs,      0 skips
    478/    482 decicycles in AV_CRC_32_IEEE,     512 runs,      0 skips
    476/    454 decicycles in AV_CRC_32_IEEE,    1024 runs,      0 skips
    472/    435 decicycles in AV_CRC_32_IEEE,    2048 runs,      0 skips

 133800/ 110660 decicycles in AV_CRC_32_IEEE_LE,       1 runs,      0 skips
    460/   1060 decicycles in AV_CRC_32_IEEE_LE,       1 runs,      0 skips
    560/    820 decicycles in AV_CRC_32_IEEE_LE,       2 runs,      0 skips
    530/    665 decicycles in AV_CRC_32_IEEE_LE,       4 runs,      0 skips
    502/    547 decicycles in AV_CRC_32_IEEE_LE,       8 runs,      0 skips
    498/    533 decicycles in AV_CRC_32_IEEE_LE,      16 runs,      0 skips
    481/    533 decicycles in AV_CRC_32_IEEE_LE,      32 runs,      0 skips
    493/    542 decicycles in AV_CRC_32_IEEE_LE,      64 runs,      0 skips
    494/    489 decicycles in AV_CRC_32_IEEE_LE,     128 runs,      0 skips
    500/    461 decicycles in AV_CRC_32_IEEE_LE,     256 runs,      0 skips
    493/    446 decicycles in AV_CRC_32_IEEE_LE,     512 runs,      0 skips
    498/    439 decicycles in AV_CRC_32_IEEE_LE,    1024 runs,      0 skips
    482/    436 decicycles in AV_CRC_32_IEEE_LE,    2048 runs,      0 skips

 131300/ 105420 decicycles in AV_CRC_16_ANSI_LE,       1 runs,      0 skips
    500/    840 decicycles in AV_CRC_16_ANSI_LE,       1 runs,      0 skips
    460/    740 decicycles in AV_CRC_16_ANSI_LE,       2 runs,      0 skips
    430/    635 decicycles in AV_CRC_16_ANSI_LE,       4 runs,      0 skips
    467/    547 decicycles in AV_CRC_16_ANSI_LE,       8 runs,      0 skips
    465/    506 decicycles in AV_CRC_16_ANSI_LE,      16 runs,      0 skips
    448/    475 decicycles in AV_CRC_16_ANSI_LE,      32 runs,      0 skips
    466/    445 decicycles in AV_CRC_16_ANSI_LE,      64 runs,      0 skips
    475/    477 decicycles in AV_CRC_16_ANSI_LE,     128 runs,      0 skips
    477/    481 decicycles in AV_CRC_16_ANSI_LE,     256 runs,      0 skips
    475/    446 decicycles in AV_CRC_16_ANSI_LE,     512 runs,      0 skips
    477/    429 decicycles in AV_CRC_16_ANSI_LE,    1024 runs,      0 skips
    471/    420 decicycles in AV_CRC_16_ANSI_LE,    2048 runs,      0 skips

  59040/  65960 decicycles in AV_CRC_24_IEEE,       1 runs,      0 skips
    720/   1180 decicycles in AV_CRC_24_IEEE,       1 runs,      0 skips
    590/   1050 decicycles in AV_CRC_24_IEEE,       2 runs,      0 skips
    515/    780 decicycles in AV_CRC_24_IEEE,       4 runs,      0 skips
    475/    602 decicycles in AV_CRC_24_IEEE,       8 runs,      0 skips
    461/    555 decicycles in AV_CRC_24_IEEE,      16 runs,      0 skips
    456/    543 decicycles in AV_CRC_24_IEEE,      32 runs,      0 skips
    460/    489 decicycles in AV_CRC_24_IEEE,      64 runs,      0 skips
    466/    521 decicycles in AV_CRC_24_IEEE,     128 runs,      0 skips
    486/    539 decicycles in AV_CRC_24_IEEE,     256 runs,      0 skips
    480/    499 decicycles in AV_CRC_24_IEEE,     512 runs,      0 skips
    493/    472 decicycles in AV_CRC_24_IEEE,    1024 runs,      0 skips
    492/    457 decicycles in AV_CRC_24_IEEE,    2048 runs,      0 skips

Thank's.
Michael Niedermayer Oct. 30, 2017, 1:07 p.m. UTC | #2
On Mon, Oct 30, 2017 at 02:14:35PM +0700, Muhammad Faiz wrote:
> On Tue, Oct 24, 2017 at 4:31 PM, Muhammad Faiz <mfcc64@gmail.com> wrote:
> > Fix tsan warnings.
> >
> > Signed-off-by: Muhammad Faiz <mfcc64@gmail.com>
> > ---
> >  libavutil/crc.c | 49 +++++++++++++++++++++++++++++--------------------
> >  1 file changed, 29 insertions(+), 20 deletions(-)
> 
> Ping.

I assume james patch is faster than both ?

If this code is never run in speed relevant loops then your solution is
better. Otherwise i think james patch is better

[...]
Muhammad Faiz Oct. 31, 2017, 5:37 p.m. UTC | #3
On Mon, Oct 30, 2017 at 8:07 PM, Michael Niedermayer
<michael@niedermayer.cc> wrote:
> On Mon, Oct 30, 2017 at 02:14:35PM +0700, Muhammad Faiz wrote:
>> On Tue, Oct 24, 2017 at 4:31 PM, Muhammad Faiz <mfcc64@gmail.com> wrote:
>> > Fix tsan warnings.
>> >
>> > Signed-off-by: Muhammad Faiz <mfcc64@gmail.com>
>> > ---
>> >  libavutil/crc.c | 49 +++++++++++++++++++++++++++++--------------------
>> >  1 file changed, 29 insertions(+), 20 deletions(-)
>>
>> Ping.
>
> I assume james patch is faster than both ?
>
> If this code is never run in speed relevant loops then your solution is
> better. Otherwise i think james patch is better

I guess the common usage is to call av_crc() which is far more
computationally intensive after calling av_crc_get_table().
Muhammad Faiz Nov. 12, 2017, 1:28 p.m. UTC | #4
On Wed, Nov 1, 2017 at 12:37 AM, Muhammad Faiz <mfcc64@gmail.com> wrote:
> On Mon, Oct 30, 2017 at 8:07 PM, Michael Niedermayer
> <michael@niedermayer.cc> wrote:
>> On Mon, Oct 30, 2017 at 02:14:35PM +0700, Muhammad Faiz wrote:
>>> On Tue, Oct 24, 2017 at 4:31 PM, Muhammad Faiz <mfcc64@gmail.com> wrote:
>>> > Fix tsan warnings.
>>> >
>>> > Signed-off-by: Muhammad Faiz <mfcc64@gmail.com>
>>> > ---
>>> >  libavutil/crc.c | 49 +++++++++++++++++++++++++++++--------------------
>>> >  1 file changed, 29 insertions(+), 20 deletions(-)
>>>
>>> Ping.
>>
>> I assume james patch is faster than both ?
>>
>> If this code is never run in speed relevant loops then your solution is
>> better. Otherwise i think james patch is better
>
> I guess the common usage is to call av_crc() which is far more
> computationally intensive after calling av_crc_get_table().

Ping.
I'm going to apply this tomorrow.

Thank's.
Muhammad Faiz Nov. 13, 2017, 11:26 p.m. UTC | #5
On Sun, Nov 12, 2017 at 8:28 PM, Muhammad Faiz <mfcc64@gmail.com> wrote:
> On Wed, Nov 1, 2017 at 12:37 AM, Muhammad Faiz <mfcc64@gmail.com> wrote:
>> On Mon, Oct 30, 2017 at 8:07 PM, Michael Niedermayer
>> <michael@niedermayer.cc> wrote:
>>> On Mon, Oct 30, 2017 at 02:14:35PM +0700, Muhammad Faiz wrote:
>>>> On Tue, Oct 24, 2017 at 4:31 PM, Muhammad Faiz <mfcc64@gmail.com> wrote:
>>>> > Fix tsan warnings.
>>>> >
>>>> > Signed-off-by: Muhammad Faiz <mfcc64@gmail.com>
>>>> > ---
>>>> >  libavutil/crc.c | 49 +++++++++++++++++++++++++++++--------------------
>>>> >  1 file changed, 29 insertions(+), 20 deletions(-)
>>>>
>>>> Ping.
>>>
>>> I assume james patch is faster than both ?
>>>
>>> If this code is never run in speed relevant loops then your solution is
>>> better. Otherwise i think james patch is better
>>
>> I guess the common usage is to call av_crc() which is far more
>> computationally intensive after calling av_crc_get_table().
>
> Ping.
> I'm going to apply this tomorrow.
>
> Thank's.

Applied.

Thank's.
diff mbox

Patch

diff --git a/libavutil/crc.c b/libavutil/crc.c
index 495732b163..d44550c9c0 100644
--- a/libavutil/crc.c
+++ b/libavutil/crc.c
@@ -20,6 +20,8 @@ 
 
 #include "config.h"
 
+#include "thread.h"
+#include "avassert.h"
 #include "bswap.h"
 #include "common.h"
 #include "crc.h"
@@ -291,20 +293,24 @@  static const AVCRC av_crc_table[AV_CRC_MAX][257] = {
 #else
 #define CRC_TABLE_SIZE 1024
 #endif
-static struct {
-    uint8_t  le;
-    uint8_t  bits;
-    uint32_t poly;
-} av_crc_table_params[AV_CRC_MAX] = {
-    [AV_CRC_8_ATM]      = { 0,  8,       0x07 },
-    [AV_CRC_16_ANSI]    = { 0, 16,     0x8005 },
-    [AV_CRC_16_CCITT]   = { 0, 16,     0x1021 },
-    [AV_CRC_24_IEEE]    = { 0, 24,   0x864CFB },
-    [AV_CRC_32_IEEE]    = { 0, 32, 0x04C11DB7 },
-    [AV_CRC_32_IEEE_LE] = { 1, 32, 0xEDB88320 },
-    [AV_CRC_16_ANSI_LE] = { 1, 16,     0xA001 },
-};
 static AVCRC av_crc_table[AV_CRC_MAX][CRC_TABLE_SIZE];
+
+#define DECLARE_CRC_INIT_TABLE_ONCE(id, le, bits, poly)                                       \
+static AVOnce id ## _once_control = AV_ONCE_INIT;                                             \
+static void id ## _init_table_once(void)                                                      \
+{                                                                                             \
+    av_assert0(av_crc_init(av_crc_table[id], le, bits, poly, sizeof(av_crc_table[id])) >= 0); \
+}
+
+#define CRC_INIT_TABLE_ONCE(id) ff_thread_once(&id ## _once_control, id ## _init_table_once)
+
+DECLARE_CRC_INIT_TABLE_ONCE(AV_CRC_8_ATM,      0,  8,       0x07)
+DECLARE_CRC_INIT_TABLE_ONCE(AV_CRC_16_ANSI,    0, 16,     0x8005)
+DECLARE_CRC_INIT_TABLE_ONCE(AV_CRC_16_CCITT,   0, 16,     0x1021)
+DECLARE_CRC_INIT_TABLE_ONCE(AV_CRC_24_IEEE,    0, 24,   0x864CFB)
+DECLARE_CRC_INIT_TABLE_ONCE(AV_CRC_32_IEEE,    0, 32, 0x04C11DB7)
+DECLARE_CRC_INIT_TABLE_ONCE(AV_CRC_32_IEEE_LE, 1, 32, 0xEDB88320)
+DECLARE_CRC_INIT_TABLE_ONCE(AV_CRC_16_ANSI_LE, 1, 16,     0xA001)
 #endif
 
 int av_crc_init(AVCRC *ctx, int le, int bits, uint32_t poly, int ctx_size)
@@ -343,13 +349,16 @@  int av_crc_init(AVCRC *ctx, int le, int bits, uint32_t poly, int ctx_size)
 const AVCRC *av_crc_get_table(AVCRCId crc_id)
 {
 #if !CONFIG_HARDCODED_TABLES
-    if (!av_crc_table[crc_id][FF_ARRAY_ELEMS(av_crc_table[crc_id]) - 1])
-        if (av_crc_init(av_crc_table[crc_id],
-                        av_crc_table_params[crc_id].le,
-                        av_crc_table_params[crc_id].bits,
-                        av_crc_table_params[crc_id].poly,
-                        sizeof(av_crc_table[crc_id])) < 0)
-            return NULL;
+    switch (crc_id) {
+    case AV_CRC_8_ATM:      CRC_INIT_TABLE_ONCE(AV_CRC_8_ATM); break;
+    case AV_CRC_16_ANSI:    CRC_INIT_TABLE_ONCE(AV_CRC_16_ANSI); break;
+    case AV_CRC_16_CCITT:   CRC_INIT_TABLE_ONCE(AV_CRC_16_CCITT); break;
+    case AV_CRC_24_IEEE:    CRC_INIT_TABLE_ONCE(AV_CRC_24_IEEE); break;
+    case AV_CRC_32_IEEE:    CRC_INIT_TABLE_ONCE(AV_CRC_32_IEEE); break;
+    case AV_CRC_32_IEEE_LE: CRC_INIT_TABLE_ONCE(AV_CRC_32_IEEE_LE); break;
+    case AV_CRC_16_ANSI_LE: CRC_INIT_TABLE_ONCE(AV_CRC_16_ANSI_LE); break;
+    default: av_assert0(0);
+    }
 #endif
     return av_crc_table[crc_id];
 }