diff mbox series

[FFmpeg-devel] riscv: vc1dsp: Don't check vlenb before checking the CPU flags

Message ID 20231215130204.127607-1-martin@martin.st
State Superseded
Headers show
Series [FFmpeg-devel] riscv: vc1dsp: Don't check vlenb before checking the CPU flags | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Martin Storsjö Dec. 15, 2023, 1:02 p.m. UTC
We can't call ff_get_rv_vlenb() if we don't have RVV available
at all.

Due to the SIGILL signal handler in checkasm catching it, in an
unexpected place, this caused checkasm to hang instead of reporting
the issue.
---
 libavcodec/riscv/vc1dsp_init.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

Comments

Rémi Denis-Courmont Dec. 15, 2023, 2:54 p.m. UTC | #1
Le 15 décembre 2023 15:02:04 GMT+02:00, "Martin Storsjö" <martin@martin.st> a écrit :
>We can't call ff_get_rv_vlenb() if we don't have RVV available
>at all.
>
>Due to the SIGILL signal handler in checkasm catching it, in an
>unexpected place, this caused checkasm to hang instead of reporting
>the issue.
>---
> libavcodec/riscv/vc1dsp_init.c | 16 +++++++---------
> 1 file changed, 7 insertions(+), 9 deletions(-)
>
>diff --git a/libavcodec/riscv/vc1dsp_init.c b/libavcodec/riscv/vc1dsp_init.c
>index 0d22d28f4d..2bb7e7fe8f 100644
>--- a/libavcodec/riscv/vc1dsp_init.c
>+++ b/libavcodec/riscv/vc1dsp_init.c
>@@ -35,15 +35,13 @@ av_cold void ff_vc1dsp_init_riscv(VC1DSPContext *dsp)
> #if HAVE_RVV
>     int flags = av_get_cpu_flags();
> 
>-    if (ff_get_rv_vlenb() >= 16) {
>-        if (flags & AV_CPU_FLAG_RVV_I64) {
>-            dsp->vc1_inv_trans_8x8_dc = ff_vc1_inv_trans_8x8_dc_rvv;
>-            dsp->vc1_inv_trans_8x4_dc = ff_vc1_inv_trans_8x4_dc_rvv;
>-        }
>-        if (flags & AV_CPU_FLAG_RVV_I32) {
>-            dsp->vc1_inv_trans_4x8_dc = ff_vc1_inv_trans_4x8_dc_rvv;
>-            dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_rvv;
>-        }
>+    if (flags & AV_CPU_FLAG_RVV_I64 && ff_get_rv_vlenb() >= 16) {
>+        dsp->vc1_inv_trans_8x8_dc = ff_vc1_inv_trans_8x8_dc_rvv;
>+        dsp->vc1_inv_trans_8x4_dc = ff_vc1_inv_trans_8x4_dc_rvv;
>+    }
>+    if (flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) {
>+        dsp->vc1_inv_trans_4x8_dc = ff_vc1_inv_trans_4x8_dc_rvv;
>+        dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_rvv;

I64 implies I32 so it is not necessary to check vlenb twice. That's what I was going for originally in my then review comments but then woopsie.

>     }
> #endif
> }
Martin Storsjö Dec. 15, 2023, 3:39 p.m. UTC | #2
On Fri, 15 Dec 2023, Rémi Denis-Courmont wrote:

> Le 15 décembre 2023 15:02:04 GMT+02:00, "Martin Storsjö" <martin@martin.st> a écrit :
>> We can't call ff_get_rv_vlenb() if we don't have RVV available
>> at all.
>>
>> Due to the SIGILL signal handler in checkasm catching it, in an
>> unexpected place, this caused checkasm to hang instead of reporting
>> the issue.
>> ---
>> libavcodec/riscv/vc1dsp_init.c | 16 +++++++---------
>> 1 file changed, 7 insertions(+), 9 deletions(-)
>>
>> diff --git a/libavcodec/riscv/vc1dsp_init.c b/libavcodec/riscv/vc1dsp_init.c
>> index 0d22d28f4d..2bb7e7fe8f 100644
>> --- a/libavcodec/riscv/vc1dsp_init.c
>> +++ b/libavcodec/riscv/vc1dsp_init.c
>> @@ -35,15 +35,13 @@ av_cold void ff_vc1dsp_init_riscv(VC1DSPContext *dsp)
>> #if HAVE_RVV
>>     int flags = av_get_cpu_flags();
>> 
>> -    if (ff_get_rv_vlenb() >= 16) {
>> -        if (flags & AV_CPU_FLAG_RVV_I64) {
>> -            dsp->vc1_inv_trans_8x8_dc = ff_vc1_inv_trans_8x8_dc_rvv;
>> -            dsp->vc1_inv_trans_8x4_dc = ff_vc1_inv_trans_8x4_dc_rvv;
>> -        }
>> -        if (flags & AV_CPU_FLAG_RVV_I32) {
>> -            dsp->vc1_inv_trans_4x8_dc = ff_vc1_inv_trans_4x8_dc_rvv;
>> -            dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_rvv;
>> -        }
>> +    if (flags & AV_CPU_FLAG_RVV_I64 && ff_get_rv_vlenb() >= 16) {
>> +        dsp->vc1_inv_trans_8x8_dc = ff_vc1_inv_trans_8x8_dc_rvv;
>> +        dsp->vc1_inv_trans_8x4_dc = ff_vc1_inv_trans_8x4_dc_rvv;
>> +    }
>> +    if (flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) {
>> +        dsp->vc1_inv_trans_4x8_dc = ff_vc1_inv_trans_4x8_dc_rvv;
>> +        dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_rvv;
>
> I64 implies I32 so it is not necessary to check vlenb twice. That's what 
> I was going for originally in my then review comments but then woopsie.

Sure, fixed.

FWIW I see that vc1_inv_trans_8x4_dc_rvv_i64 seems to fail the checkasm 
test most of the time as well.

// Martin
Rémi Denis-Courmont Dec. 15, 2023, 7:40 p.m. UTC | #3
Le 15 décembre 2023 17:39:48 GMT+02:00, "Martin Storsjö" <martin@martin.st> a écrit :
>On Fri, 15 Dec 2023, Rémi Denis-Courmont wrote:
>
>> Le 15 décembre 2023 15:02:04 GMT+02:00, "Martin Storsjö" <martin@martin.st> a écrit :
>>> We can't call ff_get_rv_vlenb() if we don't have RVV available
>>> at all.
>>> 
>>> Due to the SIGILL signal handler in checkasm catching it, in an
>>> unexpected place, this caused checkasm to hang instead of reporting
>>> the issue.
>>> ---
>>> libavcodec/riscv/vc1dsp_init.c | 16 +++++++---------
>>> 1 file changed, 7 insertions(+), 9 deletions(-)
>>> 
>>> diff --git a/libavcodec/riscv/vc1dsp_init.c b/libavcodec/riscv/vc1dsp_init.c
>>> index 0d22d28f4d..2bb7e7fe8f 100644
>>> --- a/libavcodec/riscv/vc1dsp_init.c
>>> +++ b/libavcodec/riscv/vc1dsp_init.c
>>> @@ -35,15 +35,13 @@ av_cold void ff_vc1dsp_init_riscv(VC1DSPContext *dsp)
>>> #if HAVE_RVV
>>>     int flags = av_get_cpu_flags();
>>> 
>>> -    if (ff_get_rv_vlenb() >= 16) {
>>> -        if (flags & AV_CPU_FLAG_RVV_I64) {
>>> -            dsp->vc1_inv_trans_8x8_dc = ff_vc1_inv_trans_8x8_dc_rvv;
>>> -            dsp->vc1_inv_trans_8x4_dc = ff_vc1_inv_trans_8x4_dc_rvv;
>>> -        }
>>> -        if (flags & AV_CPU_FLAG_RVV_I32) {
>>> -            dsp->vc1_inv_trans_4x8_dc = ff_vc1_inv_trans_4x8_dc_rvv;
>>> -            dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_rvv;
>>> -        }
>>> +    if (flags & AV_CPU_FLAG_RVV_I64 && ff_get_rv_vlenb() >= 16) {
>>> +        dsp->vc1_inv_trans_8x8_dc = ff_vc1_inv_trans_8x8_dc_rvv;
>>> +        dsp->vc1_inv_trans_8x4_dc = ff_vc1_inv_trans_8x4_dc_rvv;
>>> +    }
>>> +    if (flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) {
>>> +        dsp->vc1_inv_trans_4x8_dc = ff_vc1_inv_trans_4x8_dc_rvv;
>>> +        dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_rvv;
>> 
>> I64 implies I32 so it is not necessary to check vlenb twice. That's what I was going for originally in my then review comments but then woopsie.
>
>Sure, fixed.
>
>FWIW I see that vc1_inv_trans_8x4_dc_rvv_i64 seems to fail the checkasm test most of the time as well.

Hmm, I didn't write those optimisations but I thought I tested them before pushing. Is this subtly dependent on the vector length, maybe?  Currently only 128-bit hardware is commercially available but QEMU can also emulate 256, 512 and 1014.

>
>// Martin
>_______________________________________________
>ffmpeg-devel mailing list
>ffmpeg-devel@ffmpeg.org
>https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
>To unsubscribe, visit link above, or email
>ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Martin Storsjö Dec. 15, 2023, 7:57 p.m. UTC | #4
On Fri, 15 Dec 2023, Rémi Denis-Courmont wrote:

> Le 15 décembre 2023 17:39:48 GMT+02:00, "Martin Storsjö" <martin@martin.st> a écrit :
>> On Fri, 15 Dec 2023, Rémi Denis-Courmont wrote:
>>
>>> Le 15 décembre 2023 15:02:04 GMT+02:00, "Martin Storsjö" <martin@martin.st> a écrit :
>>>> We can't call ff_get_rv_vlenb() if we don't have RVV available
>>>> at all.
>>>> 
>>>> Due to the SIGILL signal handler in checkasm catching it, in an
>>>> unexpected place, this caused checkasm to hang instead of reporting
>>>> the issue.
>>>> ---
>>>> libavcodec/riscv/vc1dsp_init.c | 16 +++++++---------
>>>> 1 file changed, 7 insertions(+), 9 deletions(-)
>>>> 
>>>> diff --git a/libavcodec/riscv/vc1dsp_init.c b/libavcodec/riscv/vc1dsp_init.c
>>>> index 0d22d28f4d..2bb7e7fe8f 100644
>>>> --- a/libavcodec/riscv/vc1dsp_init.c
>>>> +++ b/libavcodec/riscv/vc1dsp_init.c
>>>> @@ -35,15 +35,13 @@ av_cold void ff_vc1dsp_init_riscv(VC1DSPContext *dsp)
>>>> #if HAVE_RVV
>>>>     int flags = av_get_cpu_flags();
>>>> 
>>>> -    if (ff_get_rv_vlenb() >= 16) {
>>>> -        if (flags & AV_CPU_FLAG_RVV_I64) {
>>>> -            dsp->vc1_inv_trans_8x8_dc = ff_vc1_inv_trans_8x8_dc_rvv;
>>>> -            dsp->vc1_inv_trans_8x4_dc = ff_vc1_inv_trans_8x4_dc_rvv;
>>>> -        }
>>>> -        if (flags & AV_CPU_FLAG_RVV_I32) {
>>>> -            dsp->vc1_inv_trans_4x8_dc = ff_vc1_inv_trans_4x8_dc_rvv;
>>>> -            dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_rvv;
>>>> -        }
>>>> +    if (flags & AV_CPU_FLAG_RVV_I64 && ff_get_rv_vlenb() >= 16) {
>>>> +        dsp->vc1_inv_trans_8x8_dc = ff_vc1_inv_trans_8x8_dc_rvv;
>>>> +        dsp->vc1_inv_trans_8x4_dc = ff_vc1_inv_trans_8x4_dc_rvv;
>>>> +    }
>>>> +    if (flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) {
>>>> +        dsp->vc1_inv_trans_4x8_dc = ff_vc1_inv_trans_4x8_dc_rvv;
>>>> +        dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_rvv;
>>> 
>>> I64 implies I32 so it is not necessary to check vlenb twice. That's what I was going for originally in my then review comments but then woopsie.
>>
>> Sure, fixed.
>>
>> FWIW I see that vc1_inv_trans_8x4_dc_rvv_i64 seems to fail the checkasm test most of the time as well.
>
> Hmm, I didn't write those optimisations but I thought I tested them 
> before pushing. Is this subtly dependent on the vector length, maybe? 
> Currently only 128-bit hardware is commercially available but QEMU can 
> also emulate 256, 512 and 1014.

Ah, yes, it succeeds with 128 bit vectors, but fails with 256 bit.

// Martin
diff mbox series

Patch

diff --git a/libavcodec/riscv/vc1dsp_init.c b/libavcodec/riscv/vc1dsp_init.c
index 0d22d28f4d..2bb7e7fe8f 100644
--- a/libavcodec/riscv/vc1dsp_init.c
+++ b/libavcodec/riscv/vc1dsp_init.c
@@ -35,15 +35,13 @@  av_cold void ff_vc1dsp_init_riscv(VC1DSPContext *dsp)
 #if HAVE_RVV
     int flags = av_get_cpu_flags();
 
-    if (ff_get_rv_vlenb() >= 16) {
-        if (flags & AV_CPU_FLAG_RVV_I64) {
-            dsp->vc1_inv_trans_8x8_dc = ff_vc1_inv_trans_8x8_dc_rvv;
-            dsp->vc1_inv_trans_8x4_dc = ff_vc1_inv_trans_8x4_dc_rvv;
-        }
-        if (flags & AV_CPU_FLAG_RVV_I32) {
-            dsp->vc1_inv_trans_4x8_dc = ff_vc1_inv_trans_4x8_dc_rvv;
-            dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_rvv;
-        }
+    if (flags & AV_CPU_FLAG_RVV_I64 && ff_get_rv_vlenb() >= 16) {
+        dsp->vc1_inv_trans_8x8_dc = ff_vc1_inv_trans_8x8_dc_rvv;
+        dsp->vc1_inv_trans_8x4_dc = ff_vc1_inv_trans_8x4_dc_rvv;
+    }
+    if (flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) {
+        dsp->vc1_inv_trans_4x8_dc = ff_vc1_inv_trans_4x8_dc_rvv;
+        dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_rvv;
     }
 #endif
 }