diff mbox series

[FFmpeg-devel,2/7] checkasm: improve print format

Message ID 20240813140338.143045-2-jdek@itanimul.li
State New
Headers show
Series [FFmpeg-devel,1/7] checkasm: add csv/tsv bench output | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished

Commit Message

J. Dekker Aug. 13, 2024, 2:03 p.m. UTC
Port dav1d's checkasm output format to FFmpeg's checkasm, includes
relative speedups and aligns results.

Signed-off-by: J. Dekker <jdek@itanimul.li>
---
 tests/checkasm/checkasm.c | 53 +++++++++++++++++++++++++++++++++++----
 1 file changed, 48 insertions(+), 5 deletions(-)

Comments

Lynne Aug. 13, 2024, 4:39 p.m. UTC | #1
On 13/08/2024 16:03, J. Dekker wrote:
> Port dav1d's checkasm output format to FFmpeg's checkasm, includes
> relative speedups and aligns results.
> 
> Signed-off-by: J. Dekker <jdek@itanimul.li>
> ---
>   tests/checkasm/checkasm.c | 53 +++++++++++++++++++++++++++++++++++----
>   1 file changed, 48 insertions(+), 5 deletions(-)
> 
> diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
> index f82ee0864f..0095758268 100644
> --- a/tests/checkasm/checkasm.c
> +++ b/tests/checkasm/checkasm.c
> @@ -18,6 +18,31 @@
>    * You should have received a copy of the GNU General Public License along
>    * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
>    * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
> + *
> + * Copyright © 2018, VideoLAN and dav1d authors
> + * Copyright © 2018, Two Orioles, LLC
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions are met:
> + *
> + * 1. Redistributions of source code must retain the above copyright notice, this
> + *    list of conditions and the following disclaimer.
> + *
> + * 2. Redistributions in binary form must reproduce the above copyright notice,
> + *    this list of conditions and the following disclaimer in the documentation
> + *    and/or other materials provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
> + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
> + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
> + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
> + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
> + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
> + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
> + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>    */
>   
>   #include "config.h"
> @@ -575,6 +600,16 @@ static int measure_nop_time(void)
>       return nop_sum / 500;
>   }
>   
> +static inline double avg_cycles_per_call(const CheckasmPerf *const p)
> +{
> +    if (p->iterations) {
> +        const double cycles = (double)(10 * p->cycles) / p->iterations - state.nop_time;
> +        if (cycles > 0.0)
> +            return cycles / 4.0; /* 4 calls per iteration */
> +    }
> +    return 0.0;
> +}
> +
>   /* Print benchmark results */
>   static void print_benchs(CheckasmFunc *f)
>   {
> @@ -584,17 +619,25 @@ static void print_benchs(CheckasmFunc *f)
>           /* Only print functions with at least one assembly version */
>           if (f->versions.cpu || f->versions.next) {
>               CheckasmFuncVersion *v = &f->versions;
> +            const CheckasmPerf *p = &v->perf;
> +            const double baseline = avg_cycles_per_call(p);
> +            double decicycles;
>               do {
> -                CheckasmPerf *p = &v->perf;
>                   if (p->iterations) {
> -                    int decicycles = (10*p->cycles/p->iterations - state.nop_time) / 4;
> +                    p = &v->perf;
> +                    decicycles = avg_cycles_per_call(p);
>                       if (state.csv) {
>                           const char sep = state.tsv ? '\t' : ',';
> -                        printf("%s%c%s%c%d.%d\n", f->name, sep,
> +                        printf("%s%c%s%c%.1f\n", f->name, sep,
>                                  cpu_suffix(v->cpu), sep,
> -                               decicycles / 10, decicycles % 10);
> +                               decicycles / 10.0);
>                       } else {
> -                        printf("%s_%s: %d.%d\n", f->name, cpu_suffix(v->cpu), decicycles/10, decicycles%10);
> +                        const int pad_length = 10 + 50 -
> +                            printf("%s_%s:", f->name, cpu_suffix(v->cpu));
> +                        const double ratio = decicycles ?
> +                            baseline / decicycles : 0.0;
> +                        printf("%*.1f (%5.2fx)\n", FFMAX(pad_length, 0),
> +                            decicycles / 10.0, ratio);
>                       }
>                   }
>               } while ((v = v->next));

How does it improve it?

You're only interested in the last X iterations, after cache has fully 
warmed up and is out of the equation. Averaging all results from all 
iteration would be also benchmarking the memory layout of the system, 
but only the cycles are of interest.
J. Dekker Aug. 16, 2024, 10:48 a.m. UTC | #2
Lynne via ffmpeg-devel <ffmpeg-devel@ffmpeg.org> writes:

> On 13/08/2024 16:03, J. Dekker wrote:
>> Port dav1d's checkasm output format to FFmpeg's checkasm, includes
>> relative speedups and aligns results.
>> Signed-off-by: J. Dekker <jdek@itanimul.li>
>> ---
>>   tests/checkasm/checkasm.c | 53 +++++++++++++++++++++++++++++++++++----
>>   1 file changed, 48 insertions(+), 5 deletions(-)
>> diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
>> index f82ee0864f..0095758268 100644
>> --- a/tests/checkasm/checkasm.c
>> +++ b/tests/checkasm/checkasm.c
>> @@ -18,6 +18,31 @@
>>    * You should have received a copy of the GNU General Public License along
>>    * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
>>    * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
>> + *
>> + * Copyright © 2018, VideoLAN and dav1d authors
>> + * Copyright © 2018, Two Orioles, LLC
>> + * All rights reserved.
>> + *
>> + * Redistribution and use in source and binary forms, with or without
>> + * modification, are permitted provided that the following conditions are met:
>> + *
>> + * 1. Redistributions of source code must retain the above copyright notice, this
>> + *    list of conditions and the following disclaimer.
>> + *
>> + * 2. Redistributions in binary form must reproduce the above copyright notice,
>> + *    this list of conditions and the following disclaimer in the documentation
>> + *    and/or other materials provided with the distribution.
>> + *
>> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
>> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
>> + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
>> + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
>> + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
>> + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
>> + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
>> + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
>> + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>>    */
>>     #include "config.h"
>> @@ -575,6 +600,16 @@ static int measure_nop_time(void)
>>       return nop_sum / 500;
>>   }
>>   +static inline double avg_cycles_per_call(const CheckasmPerf *const p)
>> +{
>> +    if (p->iterations) {
>> +        const double cycles = (double)(10 * p->cycles) / p->iterations - state.nop_time;
>> +        if (cycles > 0.0)
>> +            return cycles / 4.0; /* 4 calls per iteration */
>> +    }
>> +    return 0.0;
>> +}
>> +
>>   /* Print benchmark results */
>>   static void print_benchs(CheckasmFunc *f)
>>   {
>> @@ -584,17 +619,25 @@ static void print_benchs(CheckasmFunc *f)
>>           /* Only print functions with at least one assembly version */
>>           if (f->versions.cpu || f->versions.next) {
>>               CheckasmFuncVersion *v = &f->versions;
>> +            const CheckasmPerf *p = &v->perf;
>> +            const double baseline = avg_cycles_per_call(p);
>> +            double decicycles;
>>               do {
>> -                CheckasmPerf *p = &v->perf;
>>                   if (p->iterations) {
>> -                    int decicycles = (10*p->cycles/p->iterations - state.nop_time) / 4;
>> +                    p = &v->perf;
>> +                    decicycles = avg_cycles_per_call(p);
>>                       if (state.csv) {
>>                           const char sep = state.tsv ? '\t' : ',';
>> -                        printf("%s%c%s%c%d.%d\n", f->name, sep,
>> +                        printf("%s%c%s%c%.1f\n", f->name, sep,
>>                                  cpu_suffix(v->cpu), sep,
>> -                               decicycles / 10, decicycles % 10);
>> +                               decicycles / 10.0);
>>                       } else {
>> -                        printf("%s_%s: %d.%d\n", f->name, cpu_suffix(v->cpu), decicycles/10, decicycles%10);
>> +                        const int pad_length = 10 + 50 -
>> +                            printf("%s_%s:", f->name, cpu_suffix(v->cpu));
>> +                        const double ratio = decicycles ?
>> +                            baseline / decicycles : 0.0;
>> +                        printf("%*.1f (%5.2fx)\n", FFMAX(pad_length, 0),
>> +                            decicycles / 10.0, ratio);
>>                       }
>>                   }
>>               } while ((v = v->next));
>
> How does it improve it?
>
> You're only interested in the last X iterations, after cache has fully warmed
> up and is out of the equation. Averaging all results from all iteration would
> be also benchmarking the memory layout of the system, but only the cycles are
> of interest.

The semantics of how the benchmark is calculated is essentially
unchanged by this commit. The only intention is to follow dav1d's
checkasm format and include a relative speedup for easier at-a-glance
comparisons.
Lynne Aug. 19, 2024, 5:11 p.m. UTC | #3
On 16/08/2024 12:48, J. Dekker wrote:
> Lynne via ffmpeg-devel <ffmpeg-devel@ffmpeg.org> writes:
> 
>> On 13/08/2024 16:03, J. Dekker wrote:
>>> Port dav1d's checkasm output format to FFmpeg's checkasm, includes
>>> relative speedups and aligns results.
>>> Signed-off-by: J. Dekker <jdek@itanimul.li>
>>> ---
>>>    tests/checkasm/checkasm.c | 53 +++++++++++++++++++++++++++++++++++----
>>>    1 file changed, 48 insertions(+), 5 deletions(-)
>>> diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
>>> index f82ee0864f..0095758268 100644
>>> --- a/tests/checkasm/checkasm.c
>>> +++ b/tests/checkasm/checkasm.c
>>> @@ -18,6 +18,31 @@
>>>     * You should have received a copy of the GNU General Public License along
>>>     * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
>>>     * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
>>> + *
>>> + * Copyright © 2018, VideoLAN and dav1d authors
>>> + * Copyright © 2018, Two Orioles, LLC
>>> + * All rights reserved.
>>> + *
>>> + * Redistribution and use in source and binary forms, with or without
>>> + * modification, are permitted provided that the following conditions are met:
>>> + *
>>> + * 1. Redistributions of source code must retain the above copyright notice, this
>>> + *    list of conditions and the following disclaimer.
>>> + *
>>> + * 2. Redistributions in binary form must reproduce the above copyright notice,
>>> + *    this list of conditions and the following disclaimer in the documentation
>>> + *    and/or other materials provided with the distribution.
>>> + *
>>> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
>>> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
>>> + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
>>> + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
>>> + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
>>> + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
>>> + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
>>> + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>>> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
>>> + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>>>     */
>>>      #include "config.h"
>>> @@ -575,6 +600,16 @@ static int measure_nop_time(void)
>>>        return nop_sum / 500;
>>>    }
>>>    +static inline double avg_cycles_per_call(const CheckasmPerf *const p)
>>> +{
>>> +    if (p->iterations) {
>>> +        const double cycles = (double)(10 * p->cycles) / p->iterations - state.nop_time;
>>> +        if (cycles > 0.0)
>>> +            return cycles / 4.0; /* 4 calls per iteration */
>>> +    }
>>> +    return 0.0;
>>> +}
>>> +
>>>    /* Print benchmark results */
>>>    static void print_benchs(CheckasmFunc *f)
>>>    {
>>> @@ -584,17 +619,25 @@ static void print_benchs(CheckasmFunc *f)
>>>            /* Only print functions with at least one assembly version */
>>>            if (f->versions.cpu || f->versions.next) {
>>>                CheckasmFuncVersion *v = &f->versions;
>>> +            const CheckasmPerf *p = &v->perf;
>>> +            const double baseline = avg_cycles_per_call(p);
>>> +            double decicycles;
>>>                do {
>>> -                CheckasmPerf *p = &v->perf;
>>>                    if (p->iterations) {
>>> -                    int decicycles = (10*p->cycles/p->iterations - state.nop_time) / 4;
>>> +                    p = &v->perf;
>>> +                    decicycles = avg_cycles_per_call(p);
>>>                        if (state.csv) {
>>>                            const char sep = state.tsv ? '\t' : ',';
>>> -                        printf("%s%c%s%c%d.%d\n", f->name, sep,
>>> +                        printf("%s%c%s%c%.1f\n", f->name, sep,
>>>                                   cpu_suffix(v->cpu), sep,
>>> -                               decicycles / 10, decicycles % 10);
>>> +                               decicycles / 10.0);
>>>                        } else {
>>> -                        printf("%s_%s: %d.%d\n", f->name, cpu_suffix(v->cpu), decicycles/10, decicycles%10);
>>> +                        const int pad_length = 10 + 50 -
>>> +                            printf("%s_%s:", f->name, cpu_suffix(v->cpu));
>>> +                        const double ratio = decicycles ?
>>> +                            baseline / decicycles : 0.0;
>>> +                        printf("%*.1f (%5.2fx)\n", FFMAX(pad_length, 0),
>>> +                            decicycles / 10.0, ratio);
>>>                        }
>>>                    }
>>>                } while ((v = v->next));
>>
>> How does it improve it?
>>
>> You're only interested in the last X iterations, after cache has fully warmed
>> up and is out of the equation. Averaging all results from all iteration would
>> be also benchmarking the memory layout of the system, but only the cycles are
>> of interest.
> 
> The semantics of how the benchmark is calculated is essentially
> unchanged by this commit. The only intention is to follow dav1d's
> checkasm format and include a relative speedup for easier at-a-glance
> comparisons.

After talking through on IRC, I have no issues with the patch.
diff mbox series

Patch

diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index f82ee0864f..0095758268 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -18,6 +18,31 @@ 
  * You should have received a copy of the GNU General Public License along
  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include "config.h"
@@ -575,6 +600,16 @@  static int measure_nop_time(void)
     return nop_sum / 500;
 }
 
+static inline double avg_cycles_per_call(const CheckasmPerf *const p)
+{
+    if (p->iterations) {
+        const double cycles = (double)(10 * p->cycles) / p->iterations - state.nop_time;
+        if (cycles > 0.0)
+            return cycles / 4.0; /* 4 calls per iteration */
+    }
+    return 0.0;
+}
+
 /* Print benchmark results */
 static void print_benchs(CheckasmFunc *f)
 {
@@ -584,17 +619,25 @@  static void print_benchs(CheckasmFunc *f)
         /* Only print functions with at least one assembly version */
         if (f->versions.cpu || f->versions.next) {
             CheckasmFuncVersion *v = &f->versions;
+            const CheckasmPerf *p = &v->perf;
+            const double baseline = avg_cycles_per_call(p);
+            double decicycles;
             do {
-                CheckasmPerf *p = &v->perf;
                 if (p->iterations) {
-                    int decicycles = (10*p->cycles/p->iterations - state.nop_time) / 4;
+                    p = &v->perf;
+                    decicycles = avg_cycles_per_call(p);
                     if (state.csv) {
                         const char sep = state.tsv ? '\t' : ',';
-                        printf("%s%c%s%c%d.%d\n", f->name, sep,
+                        printf("%s%c%s%c%.1f\n", f->name, sep,
                                cpu_suffix(v->cpu), sep,
-                               decicycles / 10, decicycles % 10);
+                               decicycles / 10.0);
                     } else {
-                        printf("%s_%s: %d.%d\n", f->name, cpu_suffix(v->cpu), decicycles/10, decicycles%10);
+                        const int pad_length = 10 + 50 -
+                            printf("%s_%s:", f->name, cpu_suffix(v->cpu));
+                        const double ratio = decicycles ?
+                            baseline / decicycles : 0.0;
+                        printf("%*.1f (%5.2fx)\n", FFMAX(pad_length, 0),
+                            decicycles / 10.0, ratio);
                     }
                 }
             } while ((v = v->next));