Message ID | 20231215130204.127607-1-martin@martin.st |
---|---|
State | Superseded |
Headers | show |
Series | [FFmpeg-devel] riscv: vc1dsp: Don't check vlenb before checking the CPU flags | expand |
Context | Check | Description |
---|---|---|
yinshiyou/make_loongarch64 | success | Make finished |
yinshiyou/make_fate_loongarch64 | success | Make fate finished |
andriy/make_x86 | success | Make finished |
andriy/make_fate_x86 | success | Make fate finished |
Le 15 décembre 2023 15:02:04 GMT+02:00, "Martin Storsjö" <martin@martin.st> a écrit : >We can't call ff_get_rv_vlenb() if we don't have RVV available >at all. > >Due to the SIGILL signal handler in checkasm catching it, in an >unexpected place, this caused checkasm to hang instead of reporting >the issue. >--- > libavcodec/riscv/vc1dsp_init.c | 16 +++++++--------- > 1 file changed, 7 insertions(+), 9 deletions(-) > >diff --git a/libavcodec/riscv/vc1dsp_init.c b/libavcodec/riscv/vc1dsp_init.c >index 0d22d28f4d..2bb7e7fe8f 100644 >--- a/libavcodec/riscv/vc1dsp_init.c >+++ b/libavcodec/riscv/vc1dsp_init.c >@@ -35,15 +35,13 @@ av_cold void ff_vc1dsp_init_riscv(VC1DSPContext *dsp) > #if HAVE_RVV > int flags = av_get_cpu_flags(); > >- if (ff_get_rv_vlenb() >= 16) { >- if (flags & AV_CPU_FLAG_RVV_I64) { >- dsp->vc1_inv_trans_8x8_dc = ff_vc1_inv_trans_8x8_dc_rvv; >- dsp->vc1_inv_trans_8x4_dc = ff_vc1_inv_trans_8x4_dc_rvv; >- } >- if (flags & AV_CPU_FLAG_RVV_I32) { >- dsp->vc1_inv_trans_4x8_dc = ff_vc1_inv_trans_4x8_dc_rvv; >- dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_rvv; >- } >+ if (flags & AV_CPU_FLAG_RVV_I64 && ff_get_rv_vlenb() >= 16) { >+ dsp->vc1_inv_trans_8x8_dc = ff_vc1_inv_trans_8x8_dc_rvv; >+ dsp->vc1_inv_trans_8x4_dc = ff_vc1_inv_trans_8x4_dc_rvv; >+ } >+ if (flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) { >+ dsp->vc1_inv_trans_4x8_dc = ff_vc1_inv_trans_4x8_dc_rvv; >+ dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_rvv; I64 implies I32 so it is not necessary to check vlenb twice. That's what I was going for originally in my then review comments but then woopsie. > } > #endif > }
On Fri, 15 Dec 2023, Rémi Denis-Courmont wrote: > Le 15 décembre 2023 15:02:04 GMT+02:00, "Martin Storsjö" <martin@martin.st> a écrit : >> We can't call ff_get_rv_vlenb() if we don't have RVV available >> at all. >> >> Due to the SIGILL signal handler in checkasm catching it, in an >> unexpected place, this caused checkasm to hang instead of reporting >> the issue. >> --- >> libavcodec/riscv/vc1dsp_init.c | 16 +++++++--------- >> 1 file changed, 7 insertions(+), 9 deletions(-) >> >> diff --git a/libavcodec/riscv/vc1dsp_init.c b/libavcodec/riscv/vc1dsp_init.c >> index 0d22d28f4d..2bb7e7fe8f 100644 >> --- a/libavcodec/riscv/vc1dsp_init.c >> +++ b/libavcodec/riscv/vc1dsp_init.c >> @@ -35,15 +35,13 @@ av_cold void ff_vc1dsp_init_riscv(VC1DSPContext *dsp) >> #if HAVE_RVV >> int flags = av_get_cpu_flags(); >> >> - if (ff_get_rv_vlenb() >= 16) { >> - if (flags & AV_CPU_FLAG_RVV_I64) { >> - dsp->vc1_inv_trans_8x8_dc = ff_vc1_inv_trans_8x8_dc_rvv; >> - dsp->vc1_inv_trans_8x4_dc = ff_vc1_inv_trans_8x4_dc_rvv; >> - } >> - if (flags & AV_CPU_FLAG_RVV_I32) { >> - dsp->vc1_inv_trans_4x8_dc = ff_vc1_inv_trans_4x8_dc_rvv; >> - dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_rvv; >> - } >> + if (flags & AV_CPU_FLAG_RVV_I64 && ff_get_rv_vlenb() >= 16) { >> + dsp->vc1_inv_trans_8x8_dc = ff_vc1_inv_trans_8x8_dc_rvv; >> + dsp->vc1_inv_trans_8x4_dc = ff_vc1_inv_trans_8x4_dc_rvv; >> + } >> + if (flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) { >> + dsp->vc1_inv_trans_4x8_dc = ff_vc1_inv_trans_4x8_dc_rvv; >> + dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_rvv; > > I64 implies I32 so it is not necessary to check vlenb twice. That's what > I was going for originally in my then review comments but then woopsie. Sure, fixed. FWIW I see that vc1_inv_trans_8x4_dc_rvv_i64 seems to fail the checkasm test most of the time as well. // Martin
Le 15 décembre 2023 17:39:48 GMT+02:00, "Martin Storsjö" <martin@martin.st> a écrit : >On Fri, 15 Dec 2023, Rémi Denis-Courmont wrote: > >> Le 15 décembre 2023 15:02:04 GMT+02:00, "Martin Storsjö" <martin@martin.st> a écrit : >>> We can't call ff_get_rv_vlenb() if we don't have RVV available >>> at all. >>> >>> Due to the SIGILL signal handler in checkasm catching it, in an >>> unexpected place, this caused checkasm to hang instead of reporting >>> the issue. >>> --- >>> libavcodec/riscv/vc1dsp_init.c | 16 +++++++--------- >>> 1 file changed, 7 insertions(+), 9 deletions(-) >>> >>> diff --git a/libavcodec/riscv/vc1dsp_init.c b/libavcodec/riscv/vc1dsp_init.c >>> index 0d22d28f4d..2bb7e7fe8f 100644 >>> --- a/libavcodec/riscv/vc1dsp_init.c >>> +++ b/libavcodec/riscv/vc1dsp_init.c >>> @@ -35,15 +35,13 @@ av_cold void ff_vc1dsp_init_riscv(VC1DSPContext *dsp) >>> #if HAVE_RVV >>> int flags = av_get_cpu_flags(); >>> >>> - if (ff_get_rv_vlenb() >= 16) { >>> - if (flags & AV_CPU_FLAG_RVV_I64) { >>> - dsp->vc1_inv_trans_8x8_dc = ff_vc1_inv_trans_8x8_dc_rvv; >>> - dsp->vc1_inv_trans_8x4_dc = ff_vc1_inv_trans_8x4_dc_rvv; >>> - } >>> - if (flags & AV_CPU_FLAG_RVV_I32) { >>> - dsp->vc1_inv_trans_4x8_dc = ff_vc1_inv_trans_4x8_dc_rvv; >>> - dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_rvv; >>> - } >>> + if (flags & AV_CPU_FLAG_RVV_I64 && ff_get_rv_vlenb() >= 16) { >>> + dsp->vc1_inv_trans_8x8_dc = ff_vc1_inv_trans_8x8_dc_rvv; >>> + dsp->vc1_inv_trans_8x4_dc = ff_vc1_inv_trans_8x4_dc_rvv; >>> + } >>> + if (flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) { >>> + dsp->vc1_inv_trans_4x8_dc = ff_vc1_inv_trans_4x8_dc_rvv; >>> + dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_rvv; >> >> I64 implies I32 so it is not necessary to check vlenb twice. That's what I was going for originally in my then review comments but then woopsie. > >Sure, fixed. > >FWIW I see that vc1_inv_trans_8x4_dc_rvv_i64 seems to fail the checkasm test most of the time as well. Hmm, I didn't write those optimisations but I thought I tested them before pushing. Is this subtly dependent on the vector length, maybe? Currently only 128-bit hardware is commercially available but QEMU can also emulate 256, 512 and 1014. > >// Martin >_______________________________________________ >ffmpeg-devel mailing list >ffmpeg-devel@ffmpeg.org >https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > >To unsubscribe, visit link above, or email >ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
On Fri, 15 Dec 2023, Rémi Denis-Courmont wrote: > Le 15 décembre 2023 17:39:48 GMT+02:00, "Martin Storsjö" <martin@martin.st> a écrit : >> On Fri, 15 Dec 2023, Rémi Denis-Courmont wrote: >> >>> Le 15 décembre 2023 15:02:04 GMT+02:00, "Martin Storsjö" <martin@martin.st> a écrit : >>>> We can't call ff_get_rv_vlenb() if we don't have RVV available >>>> at all. >>>> >>>> Due to the SIGILL signal handler in checkasm catching it, in an >>>> unexpected place, this caused checkasm to hang instead of reporting >>>> the issue. >>>> --- >>>> libavcodec/riscv/vc1dsp_init.c | 16 +++++++--------- >>>> 1 file changed, 7 insertions(+), 9 deletions(-) >>>> >>>> diff --git a/libavcodec/riscv/vc1dsp_init.c b/libavcodec/riscv/vc1dsp_init.c >>>> index 0d22d28f4d..2bb7e7fe8f 100644 >>>> --- a/libavcodec/riscv/vc1dsp_init.c >>>> +++ b/libavcodec/riscv/vc1dsp_init.c >>>> @@ -35,15 +35,13 @@ av_cold void ff_vc1dsp_init_riscv(VC1DSPContext *dsp) >>>> #if HAVE_RVV >>>> int flags = av_get_cpu_flags(); >>>> >>>> - if (ff_get_rv_vlenb() >= 16) { >>>> - if (flags & AV_CPU_FLAG_RVV_I64) { >>>> - dsp->vc1_inv_trans_8x8_dc = ff_vc1_inv_trans_8x8_dc_rvv; >>>> - dsp->vc1_inv_trans_8x4_dc = ff_vc1_inv_trans_8x4_dc_rvv; >>>> - } >>>> - if (flags & AV_CPU_FLAG_RVV_I32) { >>>> - dsp->vc1_inv_trans_4x8_dc = ff_vc1_inv_trans_4x8_dc_rvv; >>>> - dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_rvv; >>>> - } >>>> + if (flags & AV_CPU_FLAG_RVV_I64 && ff_get_rv_vlenb() >= 16) { >>>> + dsp->vc1_inv_trans_8x8_dc = ff_vc1_inv_trans_8x8_dc_rvv; >>>> + dsp->vc1_inv_trans_8x4_dc = ff_vc1_inv_trans_8x4_dc_rvv; >>>> + } >>>> + if (flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) { >>>> + dsp->vc1_inv_trans_4x8_dc = ff_vc1_inv_trans_4x8_dc_rvv; >>>> + dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_rvv; >>> >>> I64 implies I32 so it is not necessary to check vlenb twice. That's what I was going for originally in my then review comments but then woopsie. >> >> Sure, fixed. >> >> FWIW I see that vc1_inv_trans_8x4_dc_rvv_i64 seems to fail the checkasm test most of the time as well. > > Hmm, I didn't write those optimisations but I thought I tested them > before pushing. Is this subtly dependent on the vector length, maybe? > Currently only 128-bit hardware is commercially available but QEMU can > also emulate 256, 512 and 1014. Ah, yes, it succeeds with 128 bit vectors, but fails with 256 bit. // Martin
diff --git a/libavcodec/riscv/vc1dsp_init.c b/libavcodec/riscv/vc1dsp_init.c index 0d22d28f4d..2bb7e7fe8f 100644 --- a/libavcodec/riscv/vc1dsp_init.c +++ b/libavcodec/riscv/vc1dsp_init.c @@ -35,15 +35,13 @@ av_cold void ff_vc1dsp_init_riscv(VC1DSPContext *dsp) #if HAVE_RVV int flags = av_get_cpu_flags(); - if (ff_get_rv_vlenb() >= 16) { - if (flags & AV_CPU_FLAG_RVV_I64) { - dsp->vc1_inv_trans_8x8_dc = ff_vc1_inv_trans_8x8_dc_rvv; - dsp->vc1_inv_trans_8x4_dc = ff_vc1_inv_trans_8x4_dc_rvv; - } - if (flags & AV_CPU_FLAG_RVV_I32) { - dsp->vc1_inv_trans_4x8_dc = ff_vc1_inv_trans_4x8_dc_rvv; - dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_rvv; - } + if (flags & AV_CPU_FLAG_RVV_I64 && ff_get_rv_vlenb() >= 16) { + dsp->vc1_inv_trans_8x8_dc = ff_vc1_inv_trans_8x8_dc_rvv; + dsp->vc1_inv_trans_8x4_dc = ff_vc1_inv_trans_8x4_dc_rvv; + } + if (flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) { + dsp->vc1_inv_trans_4x8_dc = ff_vc1_inv_trans_4x8_dc_rvv; + dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_rvv; } #endif }