diff mbox series

[FFmpeg-devel,1/2] lavu/bswap: remove some inline assembler

Message ID 20240607181947.330026-1-remi@remlab.net
State New
Headers show
Series [FFmpeg-devel,1/2] lavu/bswap: remove some inline assembler | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Rémi Denis-Courmont June 7, 2024, 6:19 p.m. UTC
C code or compiler built-ins are preferable over inline assembler for
byte-swaps as it allows for better optimisations (e.g. instruction
scheduling) which would otherwise be impossible.

As with f64c2e710fa1a7b59753224e717f57c48462076f for x86 and Arm,
this removes the inline assembler on GCC (and Clang) since we now
require recent enough compiler versions (this indeed seems to work on
AArch64).
---
 libavutil/aarch64/bswap.h | 56 ---------------------------------------
 libavutil/avr32/bswap.h   | 44 ------------------------------
 libavutil/bswap.h         |  8 +-----
 libavutil/sh4/bswap.h     | 48 ---------------------------------
 4 files changed, 1 insertion(+), 155 deletions(-)
 delete mode 100644 libavutil/aarch64/bswap.h
 delete mode 100644 libavutil/avr32/bswap.h
 delete mode 100644 libavutil/sh4/bswap.h

Comments

Sean McGovern June 7, 2024, 9:17 p.m. UTC | #1
On Fri, Jun 7, 2024 at 2:20 PM Rémi Denis-Courmont <remi@remlab.net> wrote:
>
> C code or compiler built-ins are preferable over inline assembler for
> byte-swaps as it allows for better optimisations (e.g. instruction
> scheduling) which would otherwise be impossible.
>
> As with f64c2e710fa1a7b59753224e717f57c48462076f for x86 and Arm,
> this removes the inline assembler on GCC (and Clang) since we now
> require recent enough compiler versions (this indeed seems to work on
> AArch64).
> ---
>  libavutil/aarch64/bswap.h | 56 ---------------------------------------
>  libavutil/avr32/bswap.h   | 44 ------------------------------
>  libavutil/bswap.h         |  8 +-----
>  libavutil/sh4/bswap.h     | 48 ---------------------------------
>  4 files changed, 1 insertion(+), 155 deletions(-)
>  delete mode 100644 libavutil/aarch64/bswap.h
>  delete mode 100644 libavutil/avr32/bswap.h
>  delete mode 100644 libavutil/sh4/bswap.h
>
> diff --git a/libavutil/aarch64/bswap.h b/libavutil/aarch64/bswap.h
> deleted file mode 100644
> index 7abca657ba..0000000000
> --- a/libavutil/aarch64/bswap.h
> +++ /dev/null
> @@ -1,56 +0,0 @@
> -/*
> - * This file is part of FFmpeg.
> - *
> - * FFmpeg is free software; you can redistribute it and/or
> - * modify it under the terms of the GNU Lesser General Public
> - * License as published by the Free Software Foundation; either
> - * version 2.1 of the License, or (at your option) any later version.
> - *
> - * FFmpeg is distributed in the hope that it will be useful,
> - * but WITHOUT ANY WARRANTY; without even the implied warranty of
> - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> - * Lesser General Public License for more details.
> - *
> - * You should have received a copy of the GNU Lesser General Public
> - * License along with FFmpeg; if not, write to the Free Software
> - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> - */
> -
> -#ifndef AVUTIL_AARCH64_BSWAP_H
> -#define AVUTIL_AARCH64_BSWAP_H
> -
> -#include <stdint.h>
> -#include "config.h"
> -#include "libavutil/attributes.h"
> -
> -#if HAVE_INLINE_ASM
> -
> -#define av_bswap16 av_bswap16
> -static av_always_inline av_const unsigned av_bswap16(unsigned x)
> -{
> -    unsigned y;
> -
> -    __asm__("rev16 %w0, %w1" : "=r"(y) : "r"(x));
> -    return y;
> -}
> -
> -#define av_bswap32 av_bswap32
> -static av_always_inline av_const uint32_t av_bswap32(uint32_t x)
> -{
> -    uint32_t y;
> -
> -    __asm__("rev %w0, %w1" : "=r"(y) : "r"(x));
> -    return y;
> -}
> -
> -#define av_bswap64 av_bswap64
> -static av_always_inline av_const uint64_t av_bswap64(uint64_t x)
> -{
> -    uint64_t y;
> -
> -    __asm__("rev %0, %1" : "=r"(y) : "r"(x));
> -    return y;
> -}
> -
> -#endif /* HAVE_INLINE_ASM */
> -#endif /* AVUTIL_AARCH64_BSWAP_H */
> diff --git a/libavutil/avr32/bswap.h b/libavutil/avr32/bswap.h
> deleted file mode 100644
> index e79d53f369..0000000000
> --- a/libavutil/avr32/bswap.h
> +++ /dev/null
> @@ -1,44 +0,0 @@
> -/*
> - * This file is part of FFmpeg.
> - *
> - * FFmpeg is free software; you can redistribute it and/or
> - * modify it under the terms of the GNU Lesser General Public
> - * License as published by the Free Software Foundation; either
> - * version 2.1 of the License, or (at your option) any later version.
> - *
> - * FFmpeg is distributed in the hope that it will be useful,
> - * but WITHOUT ANY WARRANTY; without even the implied warranty of
> - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> - * Lesser General Public License for more details.
> - *
> - * You should have received a copy of the GNU Lesser General Public
> - * License along with FFmpeg; if not, write to the Free Software
> - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> - */
> -
> -#ifndef AVUTIL_AVR32_BSWAP_H
> -#define AVUTIL_AVR32_BSWAP_H
> -
> -#include <stdint.h>
> -#include "config.h"
> -#include "libavutil/attributes.h"
> -
> -#if HAVE_INLINE_ASM
> -
> -#define av_bswap16 av_bswap16
> -static av_always_inline av_const uint16_t av_bswap16(uint16_t x)
> -{
> -    __asm__ ("swap.bh %0" : "+r"(x));
> -    return x;
> -}
> -
> -#define av_bswap32 av_bswap32
> -static av_always_inline av_const uint32_t av_bswap32(uint32_t x)
> -{
> -    __asm__ ("swap.b  %0" : "+r"(x));
> -    return x;
> -}
> -
> -#endif /* HAVE_INLINE_ASM */
> -
> -#endif /* AVUTIL_AVR32_BSWAP_H */
> diff --git a/libavutil/bswap.h b/libavutil/bswap.h
> index 1528906f93..b339c90b9b 100644
> --- a/libavutil/bswap.h
> +++ b/libavutil/bswap.h
> @@ -34,16 +34,10 @@
>
>  #include "config.h"
>
> -#if   ARCH_AARCH64
> -#   include "aarch64/bswap.h"
> -#elif ARCH_ARM
> +#if ARCH_ARM
>  #   include "arm/bswap.h"
> -#elif ARCH_AVR32
> -#   include "avr32/bswap.h"
>  #elif ARCH_RISCV
>  #   include "riscv/bswap.h"
> -#elif ARCH_SH4
> -#   include "sh4/bswap.h"
>  #elif ARCH_X86
>  #   include "x86/bswap.h"
>  #endif
> diff --git a/libavutil/sh4/bswap.h b/libavutil/sh4/bswap.h
> deleted file mode 100644
> index 48dd27f806..0000000000
> --- a/libavutil/sh4/bswap.h
> +++ /dev/null
> @@ -1,48 +0,0 @@
> -/*
> - * This file is part of FFmpeg.
> - *
> - * FFmpeg is free software; you can redistribute it and/or
> - * modify it under the terms of the GNU Lesser General Public
> - * License as published by the Free Software Foundation; either
> - * version 2.1 of the License, or (at your option) any later version.
> - *
> - * FFmpeg is distributed in the hope that it will be useful,
> - * but WITHOUT ANY WARRANTY; without even the implied warranty of
> - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> - * Lesser General Public License for more details.
> - *
> - * You should have received a copy of the GNU Lesser General Public
> - * License along with FFmpeg; if not, write to the Free Software
> - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> - */
> -
> -/**
> - * @file
> - * byte swapping routines
> - */
> -
> -#ifndef AVUTIL_SH4_BSWAP_H
> -#define AVUTIL_SH4_BSWAP_H
> -
> -#include <stdint.h>
> -#include "config.h"
> -#include "libavutil/attributes.h"
> -
> -#define av_bswap16 av_bswap16
> -static av_always_inline av_const uint16_t av_bswap16(uint16_t x)
> -{
> -    __asm__("swap.b %0,%0" : "+r"(x));
> -    return x;
> -}
> -
> -#define av_bswap32 av_bswap32
> -static av_always_inline av_const uint32_t av_bswap32(uint32_t x)
> -{
> -    __asm__("swap.b %0,%0\n"
> -            "swap.w %0,%0\n"
> -            "swap.b %0,%0\n"
> -            : "+r"(x));
> -    return x;
> -}
> -
> -#endif /* AVUTIL_SH4_BSWAP_H */
> --
> 2.45.1
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

This also looks good to me.
Michael Niedermayer June 11, 2024, 1:15 p.m. UTC | #2
On Fri, Jun 07, 2024 at 09:19:46PM +0300, Rémi Denis-Courmont wrote:
> C code or compiler built-ins are preferable over inline assembler for
> byte-swaps as it allows for better optimisations (e.g. instruction
> scheduling) which would otherwise be impossible.
> 
> As with f64c2e710fa1a7b59753224e717f57c48462076f for x86 and Arm,
> this removes the inline assembler on GCC (and Clang) since we now
> require recent enough compiler versions (this indeed seems to work on
> AArch64).
> ---
>  libavutil/aarch64/bswap.h | 56 ---------------------------------------
>  libavutil/avr32/bswap.h   | 44 ------------------------------
>  libavutil/bswap.h         |  8 +-----
>  libavutil/sh4/bswap.h     | 48 ---------------------------------

As you are writing that this preferrable for better optimisations
Please provide benchmarks (for sh4, avr32)

thx

[...]
Rémi Denis-Courmont June 11, 2024, 3:28 p.m. UTC | #3
Le tiistaina 11. kesäkuuta 2024, 16.15.19 EEST Michael Niedermayer a écrit :
> On Fri, Jun 07, 2024 at 09:19:46PM +0300, Rémi Denis-Courmont wrote:
> > C code or compiler built-ins are preferable over inline assembler for
> > byte-swaps as it allows for better optimisations (e.g. instruction
> > scheduling) which would otherwise be impossible.
> > 
> > As with f64c2e710fa1a7b59753224e717f57c48462076f for x86 and Arm,
> > this removes the inline assembler on GCC (and Clang) since we now
> > require recent enough compiler versions (this indeed seems to work on
> > AArch64).
> > ---
> > 
> >  libavutil/aarch64/bswap.h | 56 ---------------------------------------
> >  libavutil/avr32/bswap.h   | 44 ------------------------------
> >  libavutil/bswap.h         |  8 +-----
> >  libavutil/sh4/bswap.h     | 48 ---------------------------------
> 
> As you are writing that this preferrable for better optimisations
> Please provide benchmarks (for sh4, avr32)

How would someone benchmark an architecture like AVR32 that is not just dead 
but barely even commercially existed at all, and for which there exist no 
known C11 compiler and thus cannot even compile FFmpeg?

That toxic attitude of yours is very demotivating, and not just to me.
James Almer June 11, 2024, 3:38 p.m. UTC | #4
On 6/11/2024 10:15 AM, Michael Niedermayer wrote:
> On Fri, Jun 07, 2024 at 09:19:46PM +0300, Rémi Denis-Courmont wrote:
>> C code or compiler built-ins are preferable over inline assembler for
>> byte-swaps as it allows for better optimisations (e.g. instruction
>> scheduling) which would otherwise be impossible.
>>
>> As with f64c2e710fa1a7b59753224e717f57c48462076f for x86 and Arm,
>> this removes the inline assembler on GCC (and Clang) since we now
>> require recent enough compiler versions (this indeed seems to work on
>> AArch64).
>> ---
>>   libavutil/aarch64/bswap.h | 56 ---------------------------------------
>>   libavutil/avr32/bswap.h   | 44 ------------------------------
>>   libavutil/bswap.h         |  8 +-----
>>   libavutil/sh4/bswap.h     | 48 ---------------------------------
> 
> As you are writing that this preferrable for better optimisations
> Please provide benchmarks (for sh4, avr32)

This is a ridiculous request, considering nobody has such hardware at all.

> 
> thx
> 
> [...]
> 
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Tomas Härdin June 11, 2024, 3:50 p.m. UTC | #5
tis 2024-06-11 klockan 12:38 -0300 skrev James Almer:
> On 6/11/2024 10:15 AM, Michael Niedermayer wrote:
> > On Fri, Jun 07, 2024 at 09:19:46PM +0300, Rémi Denis-Courmont
> > wrote:
> > > C code or compiler built-ins are preferable over inline assembler
> > > for
> > > byte-swaps as it allows for better optimisations (e.g.
> > > instruction
> > > scheduling) which would otherwise be impossible.
> > > 
> > > As with f64c2e710fa1a7b59753224e717f57c48462076f for x86 and Arm,
> > > this removes the inline assembler on GCC (and Clang) since we now
> > > require recent enough compiler versions (this indeed seems to
> > > work on
> > > AArch64).
> > > ---
> > >   libavutil/aarch64/bswap.h | 56 --------------------------------
> > > -------
> > >   libavutil/avr32/bswap.h   | 44 ------------------------------
> > >   libavutil/bswap.h         |  8 +-----
> > >   libavutil/sh4/bswap.h     | 48 --------------------------------
> > > -
> > 
> > As you are writing that this preferrable for better optimisations
> > Please provide benchmarks (for sh4, avr32)
> 
> This is a ridiculous request, considering nobody has such hardware at
> all.

Maybe Måns has? He's the one who added the AVR32 code. The SH4 code was
added all the way back in 2003 in 0c6bd2ea by someone who goes by BERO.

Perhaps we should demand platforms for which we have asm also have FATE
instances?

/Tomas
Michael Niedermayer June 11, 2024, 3:57 p.m. UTC | #6
On Tue, Jun 11, 2024 at 12:38:37PM -0300, James Almer wrote:
> On 6/11/2024 10:15 AM, Michael Niedermayer wrote:
> > On Fri, Jun 07, 2024 at 09:19:46PM +0300, Rémi Denis-Courmont wrote:
> > > C code or compiler built-ins are preferable over inline assembler for
> > > byte-swaps as it allows for better optimisations (e.g. instruction
> > > scheduling) which would otherwise be impossible.
> > > 
> > > As with f64c2e710fa1a7b59753224e717f57c48462076f for x86 and Arm,
> > > this removes the inline assembler on GCC (and Clang) since we now
> > > require recent enough compiler versions (this indeed seems to work on
> > > AArch64).
> > > ---
> > >   libavutil/aarch64/bswap.h | 56 ---------------------------------------
> > >   libavutil/avr32/bswap.h   | 44 ------------------------------
> > >   libavutil/bswap.h         |  8 +-----
> > >   libavutil/sh4/bswap.h     | 48 ---------------------------------
> > 
> > As you are writing that this preferrable for better optimisations
> > Please provide benchmarks (for sh4, avr32)
> 
> This is a ridiculous request, considering nobody has such hardware at all.

Then I think its a ridiculous claim that this optimizes the code

I mean, at some point there was hardware and these optimisations did improve
speed.

This patch is not removing the code because its a rare (or dead) platform, it removes
it with the claim that this would "allows for better optimisations"
Iam sorry but i do not see why asking for the claim in the commit message
to be backed up with facts being ridiculous
The claim in the commit message may be ridiculous

thx

[...]
Paul B Mahol June 11, 2024, 3:59 p.m. UTC | #7
On Tue, Jun 11, 2024 at 5:57 PM Michael Niedermayer <michael@niedermayer.cc>
wrote:

> On Tue, Jun 11, 2024 at 12:38:37PM -0300, James Almer wrote:
> > On 6/11/2024 10:15 AM, Michael Niedermayer wrote:
> > > On Fri, Jun 07, 2024 at 09:19:46PM +0300, Rémi Denis-Courmont wrote:
> > > > C code or compiler built-ins are preferable over inline assembler for
> > > > byte-swaps as it allows for better optimisations (e.g. instruction
> > > > scheduling) which would otherwise be impossible.
> > > >
> > > > As with f64c2e710fa1a7b59753224e717f57c48462076f for x86 and Arm,
> > > > this removes the inline assembler on GCC (and Clang) since we now
> > > > require recent enough compiler versions (this indeed seems to work on
> > > > AArch64).
> > > > ---
> > > >   libavutil/aarch64/bswap.h | 56
> ---------------------------------------
> > > >   libavutil/avr32/bswap.h   | 44 ------------------------------
> > > >   libavutil/bswap.h         |  8 +-----
> > > >   libavutil/sh4/bswap.h     | 48 ---------------------------------
> > >
> > > As you are writing that this preferrable for better optimisations
> > > Please provide benchmarks (for sh4, avr32)
> >
> > This is a ridiculous request, considering nobody has such hardware at
> all.
>
> Then I think its a ridiculous claim that this optimizes the code
>
> I mean, at some point there was hardware and these optimisations did
> improve
> speed.
>
> This patch is not removing the code because its a rare (or dead) platform,
> it removes
> it with the claim that this would "allows for better optimisations"
> Iam sorry but i do not see why asking for the claim in the commit message
> to be backed up with facts being ridiculous
> The claim in the commit message may be ridiculous
>

But at same time keeping sonic State of Art of audio codec compression and
maintaining it and responding to high user demand for its new features is
virtue of this project.


>
> thx
>
> [...]
> --
> Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>
> No great genius has ever existed without some touch of madness. --
> Aristotle
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Michael Niedermayer June 11, 2024, 4:04 p.m. UTC | #8
On Tue, Jun 11, 2024 at 06:28:30PM +0300, Rémi Denis-Courmont wrote:
> Le tiistaina 11. kesäkuuta 2024, 16.15.19 EEST Michael Niedermayer a écrit :
> > On Fri, Jun 07, 2024 at 09:19:46PM +0300, Rémi Denis-Courmont wrote:
> > > C code or compiler built-ins are preferable over inline assembler for
> > > byte-swaps as it allows for better optimisations (e.g. instruction
> > > scheduling) which would otherwise be impossible.
> > > 
> > > As with f64c2e710fa1a7b59753224e717f57c48462076f for x86 and Arm,
> > > this removes the inline assembler on GCC (and Clang) since we now
> > > require recent enough compiler versions (this indeed seems to work on
> > > AArch64).
> > > ---
> > > 
> > >  libavutil/aarch64/bswap.h | 56 ---------------------------------------
> > >  libavutil/avr32/bswap.h   | 44 ------------------------------
> > >  libavutil/bswap.h         |  8 +-----
> > >  libavutil/sh4/bswap.h     | 48 ---------------------------------
> > 
> > As you are writing that this preferrable for better optimisations
> > Please provide benchmarks (for sh4, avr32)
> 
> How would someone benchmark an architecture like AVR32 that is not just dead 
> but barely even commercially existed at all, and for which there exist no 
> known C11 compiler and thus cannot even compile FFmpeg?

then simply remove avr32 with that explanation (no C11 compiler, and any other
reason)

but if a commit message says the code is removed because that "allows for better optimisations"
then yes i ask for benchmarks

thx

[...]
James Almer June 11, 2024, 4:08 p.m. UTC | #9
On 6/11/2024 12:57 PM, Michael Niedermayer wrote:
> On Tue, Jun 11, 2024 at 12:38:37PM -0300, James Almer wrote:
>> On 6/11/2024 10:15 AM, Michael Niedermayer wrote:
>>> On Fri, Jun 07, 2024 at 09:19:46PM +0300, Rémi Denis-Courmont wrote:
>>>> C code or compiler built-ins are preferable over inline assembler for
>>>> byte-swaps as it allows for better optimisations (e.g. instruction
>>>> scheduling) which would otherwise be impossible.
>>>>
>>>> As with f64c2e710fa1a7b59753224e717f57c48462076f for x86 and Arm,
>>>> this removes the inline assembler on GCC (and Clang) since we now
>>>> require recent enough compiler versions (this indeed seems to work on
>>>> AArch64).
>>>> ---
>>>>    libavutil/aarch64/bswap.h | 56 ---------------------------------------
>>>>    libavutil/avr32/bswap.h   | 44 ------------------------------
>>>>    libavutil/bswap.h         |  8 +-----
>>>>    libavutil/sh4/bswap.h     | 48 ---------------------------------
>>>
>>> As you are writing that this preferrable for better optimisations
>>> Please provide benchmarks (for sh4, avr32)
>>
>> This is a ridiculous request, considering nobody has such hardware at all.
> 
> Then I think its a ridiculous claim that this optimizes the code
> 
> I mean, at some point there was hardware and these optimisations did improve
> speed.
> 
> This patch is not removing the code because its a rare (or dead) platform, it removes
> it with the claim that this would "allows for better optimisations"
> Iam sorry but i do not see why asking for the claim in the commit message
> to be backed up with facts being ridiculous
> The claim in the commit message may be ridiculous

Compilers have come a long way since 20 years ago when this code was added.
See https://godbolt.org/z/jPose4rj3, where new GCC generates the same 
code for sh4. And no inline assembly means instruction scheduling will 
take these functions into account.

> 
> thx
> 
> [...]
> 
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Michael Niedermayer June 11, 2024, 4:10 p.m. UTC | #10
On Tue, Jun 11, 2024 at 05:50:35PM +0200, Tomas Härdin wrote:
[...]
> Perhaps we should demand platforms for which we have asm also have FATE
> instances?

qemu based fate we have for sh-4:
https://fate.ffmpeg.org/?query=subarch:sh4%2F%2F

thx

[...]
Michael Niedermayer June 11, 2024, 4:17 p.m. UTC | #11
On Tue, Jun 11, 2024 at 01:08:04PM -0300, James Almer wrote:
> On 6/11/2024 12:57 PM, Michael Niedermayer wrote:
> > On Tue, Jun 11, 2024 at 12:38:37PM -0300, James Almer wrote:
> > > On 6/11/2024 10:15 AM, Michael Niedermayer wrote:
> > > > On Fri, Jun 07, 2024 at 09:19:46PM +0300, Rémi Denis-Courmont wrote:
> > > > > C code or compiler built-ins are preferable over inline assembler for
> > > > > byte-swaps as it allows for better optimisations (e.g. instruction
> > > > > scheduling) which would otherwise be impossible.
> > > > > 
> > > > > As with f64c2e710fa1a7b59753224e717f57c48462076f for x86 and Arm,
> > > > > this removes the inline assembler on GCC (and Clang) since we now
> > > > > require recent enough compiler versions (this indeed seems to work on
> > > > > AArch64).
> > > > > ---
> > > > >    libavutil/aarch64/bswap.h | 56 ---------------------------------------
> > > > >    libavutil/avr32/bswap.h   | 44 ------------------------------
> > > > >    libavutil/bswap.h         |  8 +-----
> > > > >    libavutil/sh4/bswap.h     | 48 ---------------------------------
> > > > 
> > > > As you are writing that this preferrable for better optimisations
> > > > Please provide benchmarks (for sh4, avr32)
> > > 
> > > This is a ridiculous request, considering nobody has such hardware at all.
> > 
> > Then I think its a ridiculous claim that this optimizes the code
> > 
> > I mean, at some point there was hardware and these optimisations did improve
> > speed.
> > 
> > This patch is not removing the code because its a rare (or dead) platform, it removes
> > it with the claim that this would "allows for better optimisations"
> > Iam sorry but i do not see why asking for the claim in the commit message
> > to be backed up with facts being ridiculous
> > The claim in the commit message may be ridiculous
> 
> Compilers have come a long way since 20 years ago when this code was added.
> See https://godbolt.org/z/jPose4rj3, where new GCC generates the same code
> for sh4. And no inline assembly means instruction scheduling will take these
> functions into account.

thanks for checking
please add a note to the commit message that this was checked for sh-4
that resolves my concern about sh-4

thx

[...]
Rémi Denis-Courmont June 11, 2024, 4:24 p.m. UTC | #12
Le tiistaina 11. kesäkuuta 2024, 19.10.04 EEST Michael Niedermayer a écrit :
> On Tue, Jun 11, 2024 at 05:50:35PM +0200, Tomas Härdin wrote:
> [...]
> 
> > Perhaps we should demand platforms for which we have asm also have FATE
> > instances?
> 
> qemu based fate we have for sh-4:
> https://fate.ffmpeg.org/?query=subarch:sh4%2F%2F

Are you seriously suggesting to use QEMU TCG for benchmarking? As someone who 
has contributed to QEMU a little, I daresay that this is insane.
Rémi Denis-Courmont June 11, 2024, 4:27 p.m. UTC | #13
Le tiistaina 11. kesäkuuta 2024, 19.04.17 EEST Michael Niedermayer a écrit :
> then simply remove avr32 with that explanation (no C11 compiler, and any
> other reason)

No. Måns and my optimisation arguments stand, even if it is purely 
hypothetical in the case of AVR32 (for which there is no working compiler). It 
is a *general* argument.

Removing the AVR32 support is not the point of *this* patch, so you are asking 
me to misrepresent what the patch does and why. As for SH4, James already 
addressed that.

> but if a commit message says the code is removed because that "allows for
> better optimisations" then yes i ask for benchmarks

"Allows for better optimisations" means exactly that: enable compilers to 
*potentially* optimise better. I never claimed that it actually improved 
performance in any given particular case.

Nevertheless it will make performance worse in one and only one case: a 
defective/half-baked compiler: missing the byte-swap instruction (if it 
exists) and/or a proper scheduling model, for the target. In other words, you 
are essentially arguing that FFmpeg should be optimised for bad C compilers 
instead of good ones.
Tomas Härdin June 11, 2024, 5:20 p.m. UTC | #14
tis 2024-06-11 klockan 18:10 +0200 skrev Michael Niedermayer:
> On Tue, Jun 11, 2024 at 05:50:35PM +0200, Tomas Härdin wrote:
> [...]
> > Perhaps we should demand platforms for which we have asm also have
> > FATE
> > instances?
> 
> qemu based fate we have for sh-4:
> https://fate.ffmpeg.org/?query=subarch:sh4%2F%2F

I think we need actual machines, and actual users that want to run on
those machines, else we're just doing mental self-gratification

/Tomas
diff mbox series

Patch

diff --git a/libavutil/aarch64/bswap.h b/libavutil/aarch64/bswap.h
deleted file mode 100644
index 7abca657ba..0000000000
--- a/libavutil/aarch64/bswap.h
+++ /dev/null
@@ -1,56 +0,0 @@ 
-/*
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVUTIL_AARCH64_BSWAP_H
-#define AVUTIL_AARCH64_BSWAP_H
-
-#include <stdint.h>
-#include "config.h"
-#include "libavutil/attributes.h"
-
-#if HAVE_INLINE_ASM
-
-#define av_bswap16 av_bswap16
-static av_always_inline av_const unsigned av_bswap16(unsigned x)
-{
-    unsigned y;
-
-    __asm__("rev16 %w0, %w1" : "=r"(y) : "r"(x));
-    return y;
-}
-
-#define av_bswap32 av_bswap32
-static av_always_inline av_const uint32_t av_bswap32(uint32_t x)
-{
-    uint32_t y;
-
-    __asm__("rev %w0, %w1" : "=r"(y) : "r"(x));
-    return y;
-}
-
-#define av_bswap64 av_bswap64
-static av_always_inline av_const uint64_t av_bswap64(uint64_t x)
-{
-    uint64_t y;
-
-    __asm__("rev %0, %1" : "=r"(y) : "r"(x));
-    return y;
-}
-
-#endif /* HAVE_INLINE_ASM */
-#endif /* AVUTIL_AARCH64_BSWAP_H */
diff --git a/libavutil/avr32/bswap.h b/libavutil/avr32/bswap.h
deleted file mode 100644
index e79d53f369..0000000000
--- a/libavutil/avr32/bswap.h
+++ /dev/null
@@ -1,44 +0,0 @@ 
-/*
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVUTIL_AVR32_BSWAP_H
-#define AVUTIL_AVR32_BSWAP_H
-
-#include <stdint.h>
-#include "config.h"
-#include "libavutil/attributes.h"
-
-#if HAVE_INLINE_ASM
-
-#define av_bswap16 av_bswap16
-static av_always_inline av_const uint16_t av_bswap16(uint16_t x)
-{
-    __asm__ ("swap.bh %0" : "+r"(x));
-    return x;
-}
-
-#define av_bswap32 av_bswap32
-static av_always_inline av_const uint32_t av_bswap32(uint32_t x)
-{
-    __asm__ ("swap.b  %0" : "+r"(x));
-    return x;
-}
-
-#endif /* HAVE_INLINE_ASM */
-
-#endif /* AVUTIL_AVR32_BSWAP_H */
diff --git a/libavutil/bswap.h b/libavutil/bswap.h
index 1528906f93..b339c90b9b 100644
--- a/libavutil/bswap.h
+++ b/libavutil/bswap.h
@@ -34,16 +34,10 @@ 
 
 #include "config.h"
 
-#if   ARCH_AARCH64
-#   include "aarch64/bswap.h"
-#elif ARCH_ARM
+#if ARCH_ARM
 #   include "arm/bswap.h"
-#elif ARCH_AVR32
-#   include "avr32/bswap.h"
 #elif ARCH_RISCV
 #   include "riscv/bswap.h"
-#elif ARCH_SH4
-#   include "sh4/bswap.h"
 #elif ARCH_X86
 #   include "x86/bswap.h"
 #endif
diff --git a/libavutil/sh4/bswap.h b/libavutil/sh4/bswap.h
deleted file mode 100644
index 48dd27f806..0000000000
--- a/libavutil/sh4/bswap.h
+++ /dev/null
@@ -1,48 +0,0 @@ 
-/*
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/**
- * @file
- * byte swapping routines
- */
-
-#ifndef AVUTIL_SH4_BSWAP_H
-#define AVUTIL_SH4_BSWAP_H
-
-#include <stdint.h>
-#include "config.h"
-#include "libavutil/attributes.h"
-
-#define av_bswap16 av_bswap16
-static av_always_inline av_const uint16_t av_bswap16(uint16_t x)
-{
-    __asm__("swap.b %0,%0" : "+r"(x));
-    return x;
-}
-
-#define av_bswap32 av_bswap32
-static av_always_inline av_const uint32_t av_bswap32(uint32_t x)
-{
-    __asm__("swap.b %0,%0\n"
-            "swap.w %0,%0\n"
-            "swap.b %0,%0\n"
-            : "+r"(x));
-    return x;
-}
-
-#endif /* AVUTIL_SH4_BSWAP_H */