diff mbox

[FFmpeg-devel] beautified + accelerated vf_fillborders – Please review

Message ID ab2c702d-568e-a587-87ec-5d0031c92ecf@CoSoCo.de
State Superseded
Headers show

Commit Message

Ulf Zibis March 19, 2019, 2:57 p.m. UTC
Hi again,

Am 12.03.19 um 00:37 schrieb Carl Eugen Hoyos:
> 2019-03-12 0:25 GMT+01:00, Moritz Barsnick <barsnick@gmx.net>:
>> Ideally, you use the START_TIMER/STOP_TIMER macros to
>> profile the actual functions you changed. (Check this mailing list's
>> archives for some examples, and play with the code.)
> But this should not be needed if time (the command) and / or
> benchmark (the FFmpeg option) show clear improvements.

With the benchmark option I can not see the time for the filter, just
for the de/encoding, and as I assume, that this filter is much faster
than the de/encoding around it, I suspect, the overall time will be helpful.

So I have "played" with the START_TIMER/STOP_TIMER macros.

Now I'm kind of helpless, as the numbers I get are varying in wide
range. It seems, that my changes help a little for e.g. "-vf
fillborders:0:0:5:5:mirror". This is what I expected by bypassing the
code loops for the right/left borders, when there is nothing to do, but
the timer results are "noisy".

I attach the patches for the first 2 chunks again, and too the patches
for my timed version. Hopefully you have the time to play a little with
that and can give me hints, how I could get more reliable numbers. (I
just had closed all other applications like Firefox, Transmission etc.
before running the benchmarks)

-Ulf

==========================================================================
$ debug/fillborders.sh
Test[0] ======> 3-plane 8-bit YUV-colour:    CYD_1005.jpg <======
./ffmpeg-p1 : CYD_1005.jpg --> ZZ_CYD_1005_mirror-0-0-5-5.jpg
 122670 decicycles in fillborders=0:0:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 133020 decicycles in fillborders=0:0:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 119430 decicycles in fillborders=0:0:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 118350 decicycles in fillborders=0:0:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 124740 decicycles in fillborders=0:0:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 122130 decicycles in fillborders=0:0:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
./ffmpeg-p2 : CYD_1005.jpg --> ZZ_CYD_1005_mirror-0-0-5-5.jpg
 118800 decicycles in fillborders=0:0:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 123840 decicycles in fillborders=0:0:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 121500 decicycles in fillborders=0:0:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 135090 decicycles in fillborders=0:0:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 126270 decicycles in fillborders=0:0:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 125730 decicycles in fillborders=0:0:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
./ffmpeg-p1 : CYD_1005.jpg --> ZZ_CYD_1005_mirror-5-5-0-0.jpg
 557730 decicycles in fillborders=5:5:0:0:mirror 3p-8bit-1x1,       1
runs,      0 skips
 614880 decicycles in fillborders=5:5:0:0:mirror 3p-8bit-1x1,       1
runs,      0 skips
 598410 decicycles in fillborders=5:5:0:0:mirror 3p-8bit-1x1,       1
runs,      0 skips
 545940 decicycles in fillborders=5:5:0:0:mirror 3p-8bit-1x1,       1
runs,      0 skips
 591030 decicycles in fillborders=5:5:0:0:mirror 3p-8bit-1x1,       1
runs,      0 skips
 566910 decicycles in fillborders=5:5:0:0:mirror 3p-8bit-1x1,       1
runs,      0 skips
./ffmpeg-p2 : CYD_1005.jpg --> ZZ_CYD_1005_mirror-5-5-0-0.jpg
 542430 decicycles in fillborders=5:5:0:0:mirror 3p-8bit-1x1,       1
runs,      0 skips
 567900 decicycles in fillborders=5:5:0:0:mirror 3p-8bit-1x1,       1
runs,      0 skips
 490050 decicycles in fillborders=5:5:0:0:mirror 3p-8bit-1x1,       1
runs,      0 skips
 579330 decicycles in fillborders=5:5:0:0:mirror 3p-8bit-1x1,       1
runs,      0 skips
 521370 decicycles in fillborders=5:5:0:0:mirror 3p-8bit-1x1,       1
runs,      0 skips
 890370 decicycles in fillborders=5:5:0:0:mirror 3p-8bit-1x1,       1
runs,      0 skips
./ffmpeg-p1 : CYD_1005.jpg --> ZZ_CYD_1005_mirror-5-5-5-5.jpg
 576540 decicycles in fillborders=5:5:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 597060 decicycles in fillborders=5:5:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 599940 decicycles in fillborders=5:5:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 621900 decicycles in fillborders=5:5:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 588870 decicycles in fillborders=5:5:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 606600 decicycles in fillborders=5:5:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
./ffmpeg-p2 : CYD_1005.jpg --> ZZ_CYD_1005_mirror-5-5-5-5.jpg
 522090 decicycles in fillborders=5:5:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 655650 decicycles in fillborders=5:5:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 609660 decicycles in fillborders=5:5:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 600300 decicycles in fillborders=5:5:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 561510 decicycles in fillborders=5:5:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 630090 decicycles in fillborders=5:5:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
=====================================================================================
$ debug/fillborders.sh
Test[0] ======> 3-plane 8-bit YUV-colour:    CYD_1005.jpg <======
./ffmpeg-p1 : CYD_1005.jpg --> ZZ_CYD_1005_mirror-0-0-5-5.jpg
 131220 decicycles in fillborders=0:0:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 141030 decicycles in fillborders=0:0:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 135900 decicycles in fillborders=0:0:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 133380 decicycles in fillborders=0:0:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 148230 decicycles in fillborders=0:0:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 119880 decicycles in fillborders=0:0:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
./ffmpeg-p2 : CYD_1005.jpg --> ZZ_CYD_1005_mirror-0-0-5-5.jpg
 165870 decicycles in fillborders=0:0:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 120960 decicycles in fillborders=0:0:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 126450 decicycles in fillborders=0:0:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 122310 decicycles in fillborders=0:0:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 132660 decicycles in fillborders=0:0:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 122940 decicycles in fillborders=0:0:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
./ffmpeg-p1 : CYD_1005.jpg --> ZZ_CYD_1005_mirror-5-5-0-0.jpg
 578160 decicycles in fillborders=5:5:0:0:mirror 3p-8bit-1x1,       1
runs,      0 skips
 571140 decicycles in fillborders=5:5:0:0:mirror 3p-8bit-1x1,       1
runs,      0 skips
 652320 decicycles in fillborders=5:5:0:0:mirror 3p-8bit-1x1,       1
runs,      0 skips
 571500 decicycles in fillborders=5:5:0:0:mirror 3p-8bit-1x1,       1
runs,      0 skips
 756810 decicycles in fillborders=5:5:0:0:mirror 3p-8bit-1x1,       1
runs,      0 skips
 515880 decicycles in fillborders=5:5:0:0:mirror 3p-8bit-1x1,       1
runs,      0 skips
./ffmpeg-p2 : CYD_1005.jpg --> ZZ_CYD_1005_mirror-5-5-0-0.jpg
 625140 decicycles in fillborders=5:5:0:0:mirror 3p-8bit-1x1,       1
runs,      0 skips
 595260 decicycles in fillborders=5:5:0:0:mirror 3p-8bit-1x1,       1
runs,      0 skips
 552600 decicycles in fillborders=5:5:0:0:mirror 3p-8bit-1x1,       1
runs,      0 skips
 636390 decicycles in fillborders=5:5:0:0:mirror 3p-8bit-1x1,       1
runs,      0 skips
 687960 decicycles in fillborders=5:5:0:0:mirror 3p-8bit-1x1,       1
runs,      0 skips
 648900 decicycles in fillborders=5:5:0:0:mirror 3p-8bit-1x1,       1
runs,      0 skips
./ffmpeg-p1 : CYD_1005.jpg --> ZZ_CYD_1005_mirror-5-5-5-5.jpg
 578610 decicycles in fillborders=5:5:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 552060 decicycles in fillborders=5:5:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 604980 decicycles in fillborders=5:5:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 486900 decicycles in fillborders=5:5:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 498780 decicycles in fillborders=5:5:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 549900 decicycles in fillborders=5:5:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
./ffmpeg-p2 : CYD_1005.jpg --> ZZ_CYD_1005_mirror-5-5-5-5.jpg
 642240 decicycles in fillborders=5:5:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 658710 decicycles in fillborders=5:5:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
1701630 decicycles in fillborders=5:5:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 676350 decicycles in fillborders=5:5:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 622350 decicycles in fillborders=5:5:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
 693630 decicycles in fillborders=5:5:5:5:mirror 3p-8bit-1x1,       1
runs,      0 skips
==============================================================================

Comments

Carl Eugen Hoyos March 19, 2019, 4:31 p.m. UTC | #1
2019-03-19 15:57 GMT+01:00, Ulf Zibis <Ulf.Zibis@cosoco.de>:

> $ debug/fillborders.sh
> Test[0] ======> 3-plane 8-bit YUV-colour:    CYD_1005.jpg <======
> ./ffmpeg-p1 : CYD_1005.jpg --> ZZ_CYD_1005_mirror-0-0-5-5.jpg

This does not look like a command line but to avoid the encoding
time, "-f null -" can be used.

>  122670 decicycles in fillborders=0:0:5:5:mirror 3p-8bit-1x1,
> 1 runs,      0 skips

One run is not good.
Either use the loop option to filter the same frame again and
again or feed a video to ffmpeg.

Carl Eugen
Ulf Zibis March 19, 2019, 7:57 p.m. UTC | #2
Am 19.03.19 um 17:31 schrieb Carl Eugen Hoyos:
>> $ debug/fillborders.sh
>> Test[0] ======> 3-plane 8-bit YUV-colour:    CYD_1005.jpg <======
>> ./ffmpeg-p1 : CYD_1005.jpg --> ZZ_CYD_1005_mirror-0-0-5-5.jpg
> This does not look like a command line

The command line is in the script file debug/fillborders.sh.
This echo comment line with ./ffmpeg-p1 means that the following runs
were done with the build from patch 1 and with ./ffmpeg-2 from patch 2
for comparison.

>  but to avoid the encoding
> time, "-f null -" can be used.
You mean this as answer for using the -benchmark option? Thanks for the
hint. But the CPU time for the decoding would still be there, which i'm
afraid, it will too much overflow the little CPU time for the filter.

>>  122670 decicycles in fillborders=0:0:5:5:mirror 3p-8bit-1x1,
>> 1 runs,      0 skips
> One run is not good.
I did 6 runs for each command line pattern by loop in
debug/fillborders.sh (included in vf_fillbd_benchmark_2.patch).

> Either use the loop option to filter the same frame again and
> again or feed a video to ffmpeg.

Ok, I'll try this.

-Ulf
Ulf Zibis March 19, 2019, 8:44 p.m. UTC | #3
Am 19.03.19 um 17:31 schrieb Carl Eugen Hoyos:
> This does not look like a command line but to avoid the encoding
> time, "-f null -" can be used.
>
>>  122670 decicycles in fillborders=0:0:5:5:mirror 3p-8bit-1x1,
>> 1 runs,      0 skips
> One run is not good.
> Either use the loop option to filter the same frame again and
> again or feed a video to ffmpeg.
With:
./ffmpeg -y -v error -stream_loop 100 -i input.jpg -vf
fillborders=5:5:5:5:mirror -f null -
I still see only 1 run. What I'm doing wrong?

-Ulf
Ulf Zibis March 25, 2019, 10:13 a.m. UTC | #4
Hi again,

Am 19.03.19 um 21:44 schrieb Ulf Zibis:
> Am 19.03.19 um 17:31 schrieb Carl Eugen Hoyos:
>>>  122670 decicycles in fillborders=0:0:5:5:mirror 3p-8bit-1x1,
>>> 1 runs,      0 skips
>> One run is not good.
>> Either use the loop option to filter the same frame again and
>> again or feed a video to ffmpeg.
> With:
> ./ffmpeg -y -v error -stream_loop 100 -i input.jpg -vf
> fillborders=5:5:5:5:mirror -f null -
> I still see only 1 run. What I'm doing wrong?
As I was not able to find a loop option I used -stream_loop. Now I'm
wondering, that it doesn't work as I expect.

Do I misinterpret the purpose of -stream_loop?

-Ulf
Ulf Zibis March 26, 2019, 2:42 p.m. UTC | #5
Hi again,

Am 19.03.19 um 17:31 schrieb Carl Eugen Hoyos:
>>  122670 decicycles in fillborders=0:0:5:5:mirror 3p-8bit-1x1,
>> 1 runs,      0 skips
> One run is not good.
> Either use the loop option to filter the same frame again and
> again or feed a video to ffmpeg.

Do you mean the following option? Unfortunately I still see only 1 run.

I know, that it works with "-vf -loop=loop=1024:size=1:start=0", but I
ask, because I want to understand the purpose of the shorter option
"-loop number".

./ffmpeg-p7b -y -i debug/8.jpg -loop 1024 -vf
fillborders=25:25:25:25:mirror debug/ZZ_8_mirror-25-25-25-25.jpg
ffmpeg version N-93458-g18429ce896 Copyright (c) 2000-2019 the FFmpeg
developers
  built with gcc 7 (Ubuntu 7.3.0-27ubuntu1~18.04)
  configuration:
  libavutil      56. 26.100 / 56. 26.100
  libavcodec     58. 47.105 / 58. 47.105
  libavformat    58. 26.101 / 58. 26.101
  libavdevice    58.  7.100 / 58.  7.100
  libavfilter     7. 48.100 /  7. 48.100
  libswscale      5.  4.100 /  5.  4.100
  libswresample   3.  4.100 /  3.  4.100
Input #0, image2, from 'debug/8.jpg':
  Duration: 00:00:00.04, start: 0.000000, bitrate: 39119 kb/s
    Stream #0:0: Video: mjpeg (Lossless), gray(bt470bg/unknown/unknown),
640x480 [SAR 96:96 DAR 4:3], lossless, 25 tbr, 25 tbn, 25 tbc
Stream mapping:
  Stream #0:0 -> #0:0 (mjpeg (native) -> mjpeg (native))
Press [q] to stop, [?] for help
[swscaler @ 0x560c9b036400] deprecated pixel format used, make sure you
did set range correctly
 968130 decicycles in fillborders=25:25:25:25:mirror 1p-8bit-0x0,      
1 runs,      0 skips
Output #0, image2, to 'debug/ZZ_8_mirror-25-25-25-25.jpg':
  Metadata:
    encoder         : Lavf58.26.101
    Stream #0:0: Video: mjpeg, yuvj444p(pc), 640x480 [SAR 1:1 DAR 4:3],
q=2-31, 200 kb/s, 25 fps, 25 tbn, 25 tbc
    Metadata:
      encoder         : Lavc58.47.105 mjpeg
    Side data:
      cpb: bitrate max/min/avg: 0/0/200000 buffer size: 0 vbv_delay: -1
frame=    1 fps=0.0 q=6.0 Lsize=N/A time=00:00:00.04 bitrate=N/A
speed=1.69x   
video:34kB audio:0kB subtitle:0kB other streams:0kB global headers:0kB
muxing overhead: unknown
Carl Eugen Hoyos March 26, 2019, 2:47 p.m. UTC | #6
2019-03-26 15:42 GMT+01:00, Ulf Zibis <Ulf.Zibis@cosoco.de>:

> Do you mean the following option? Unfortunately I still see only 1 run.
>
> I know, that it works with "-vf -loop=loop=1024:size=1:start=0", but I
> ask, because I want to understand the purpose of the shorter option
> "-loop number".

> ./ffmpeg-p7b -y -i debug/8.jpg -loop 1024 -vf

loop is an input option, consider using rawvideo (possibly /dev/zero)
as input to increase performance (and reduce measurement error).

> fillborders=25:25:25:25:mirror debug/ZZ_8_mirror-25-25-25-25.jpg

and output to "-f null -".

Carl Eugen
Nicolas George March 26, 2019, 2:48 p.m. UTC | #7
Ulf Zibis (12019-03-26):
> Do you mean the following option? Unfortunately I still see only 1 run.
> 
> I know, that it works with "-vf -loop=loop=1024:size=1:start=0", but I
> ask, because I want to understand the purpose of the shorter option
> "-loop number".
> 
> ./ffmpeg-p7b -y -i debug/8.jpg -loop 1024 -vf
> fillborders=25:25:25:25:mirror debug/ZZ_8_mirror-25-25-25-25.jpg

Are you trying to benchmark the JPEG encoder? If not, do not use the
JPEG encoder, use no encoder at all.

Are you trying to benchmark the image2 muxer? If not, do not use the
image2 muxer, use no muxer at all.

Are you trying to benchmark the JPEG decoder? If not, do not use the
JPEG decoder, use the "color" filter source, or, if the test requires
non-trivial content to be relevant, prepare a rawvideo input.

Most of all: use common sense!

Regards,
Ulf Zibis March 26, 2019, 2:48 p.m. UTC | #8
Am 26.03.19 um 15:42 schrieb Ulf Zibis:
> Hi again,
>
> Am 19.03.19 um 17:31 schrieb Carl Eugen Hoyos:
>>>  122670 decicycles in fillborders=0:0:5:5:mirror 3p-8bit-1x1,
>>> 1 runs,      0 skips
>> One run is not good.
>> Either use the loop option to filter the same frame again and
>> again or feed a video to ffmpeg.
> Do you mean the following option? Unfortunately I still see only 1 run.
>
> I know, that it works with "-vf -loop=loop=1024:size=1:start=0", but I
> ask, because I want to understand the purpose of the shorter option
> "-loop number".

Also  "-stream_loop 1024" doesn't work as I would expect.

-Ulf
Ulf Zibis March 26, 2019, 2:56 p.m. UTC | #9
Am 26.03.19 um 15:48 schrieb Nicolas George:
> Ulf Zibis (12019-03-26):
>> Do you mean the following option? Unfortunately I still see only 1 run.
>>
>> I know, that it works with "-vf -loop=loop=1024:size=1:start=0", but I
>> ask, because I want to understand the purpose of the shorter option
>> "-loop number".
>>
>> ./ffmpeg-p7b -y -i debug/8.jpg -loop 1024 -vf
>> fillborders=25:25:25:25:mirror debug/ZZ_8_mirror-25-25-25-25.jpg
> Are you trying to benchmark the JPEG encoder? If not, do not use the
> JPEG encoder, use no encoder at all.
>
> Are you trying to benchmark the image2 muxer? If not, do not use the
> image2 muxer, use no muxer at all.
>
> Are you trying to benchmark the JPEG decoder? If not, do not use the
> JPEG decoder, use the "color" filter source, or, if the test requires
> non-trivial content to be relevant, prepare a rawvideo input.

Thanks for your hints.

I'm trying to benchmark -vf fillborders (added the timer code in
vf_fillborders.c), so Carl Eugen's suggestion to use /dev/zero as input
would not make sense. I'll try with "-f null -".

-Ulf
Ulf Zibis March 26, 2019, 2:59 p.m. UTC | #10
Am 26.03.19 um 15:56 schrieb Ulf Zibis:
> I'm trying to benchmark -vf fillborders (added the timer code in
> vf_fillborders.c), so Carl Eugen's suggestion to use /dev/zero as input
> would not make sense. I'll try with "-f null -".

Again only 1 runs (also with "-stream_loop 1024").

-Ulf
Nicolas George March 26, 2019, 3 p.m. UTC | #11
Ulf Zibis (12019-03-26):
> I'm trying to benchmark -vf fillborders (added the timer code in
> vf_fillborders.c), so Carl Eugen's suggestion to use /dev/zero as input
> would not make sense. I'll try with "-f null -".

Using the "color" filter source may be a little more efficient, and is
much more convenient.

Regards,
Nicolas George March 26, 2019, 3:01 p.m. UTC | #12
Ulf Zibis (12019-03-26):
> Again only 1 runs (also with "-stream_loop 1024").

You are obviously doing something wrong.

Regards,
Carl Eugen Hoyos March 26, 2019, 3:10 p.m. UTC | #13
2019-03-26 15:56 GMT+01:00, Ulf Zibis <Ulf.Zibis@cosoco.de>:

> I'm trying to benchmark -vf fillborders (added the timer
> code in vf_fillborders.c), so Carl Eugen's suggestion
> to use /dev/zero as input would not make sense.

Please elaborate.

Carl Eugen
Ulf Zibis March 26, 2019, 3:23 p.m. UTC | #14
Am 26.03.19 um 16:00 schrieb Nicolas George:
> Using the "color" filter source may be a little more efficient, and is
> much more convenient.
With
ffplay -f lavfi color=green
I only see a monotone picture. This is not apropriate to test the
fillborders filter with mode=mirror.

Also yuvtestsrc is not really helpfull on that.

-Ulf
Nicolas George March 26, 2019, 3:26 p.m. UTC | #15
Ulf Zibis (12019-03-26):
> With
> ffplay -f lavfi color=green
> I only see a monotone picture. This is not apropriate to test the
> fillborders filter with mode=mirror.

Ok. Then it is not suitable. And neither would be /dev/zero.

> Also yuvtestsrc is not really helpfull on that.

Try testsrc2.

And if it is satisfactory, prepare a rawvideo clip with a lightweight
muxer.

Regards,
Ulf Zibis March 26, 2019, 3:28 p.m. UTC | #16
Am 26.03.19 um 16:10 schrieb Carl Eugen Hoyos:
> 2019-03-26 15:56 GMT+01:00, Ulf Zibis <Ulf.Zibis@cosoco.de>:
>
>> I'm trying to benchmark -vf fillborders (added the timer
>> code in vf_fillborders.c), so Carl Eugen's suggestion
>> to use /dev/zero as input would not make sense.
> Please elaborate.

It seems I'm doing something wrong:

ich@T500:~/Projects/ffmpeg/dev$ ./ffmpeg-p7b -y -stream_loop 1024 -i
/dev/zero -vf fillborders=25:25:25:25:mirror -f null -
ffmpeg version N-93458-g18429ce896 Copyright (c) 2000-2019 the FFmpeg
developers
  built with gcc 7 (Ubuntu 7.3.0-27ubuntu1~18.04)
  configuration:
  libavutil      56. 26.100 / 56. 26.100
  libavcodec     58. 47.105 / 58. 47.105
  libavformat    58. 26.101 / 58. 26.101
  libavdevice    58.  7.100 / 58.  7.100
  libavfilter     7. 48.100 /  7. 48.100
  libswscale      5.  4.100 /  5.  4.100
  libswresample   3.  4.100 /  3.  4.100
/dev/zero: Invalid data found when processing input
ich@T500:~/Projects/ffmpeg/dev$ ./ffmpeg-p7b -y -i /dev/zero -loop 1024
-vf fillborders=25:25:25:25:mirror -f null -
ffmpeg version N-93458-g18429ce896 Copyright (c) 2000-2019 the FFmpeg
developers
  built with gcc 7 (Ubuntu 7.3.0-27ubuntu1~18.04)
  configuration:
  libavutil      56. 26.100 / 56. 26.100
  libavcodec     58. 47.105 / 58. 47.105
  libavformat    58. 26.101 / 58. 26.101
  libavdevice    58.  7.100 / 58.  7.100
  libavfilter     7. 48.100 /  7. 48.100
  libswscale      5.  4.100 /  5.  4.100
  libswresample   3.  4.100 /  3.  4.100
/dev/zero: Invalid data found when processing input

-Ulf
Nicolas George March 26, 2019, 3:31 p.m. UTC | #17
Ulf Zibis (12019-03-26):
> It seems I'm doing something wrong:
> 
> ich@T500:~/Projects/ffmpeg/dev$ ./ffmpeg-p7b -y -stream_loop 1024 -i
> /dev/zero -vf fillborders=25:25:25:25:mirror -f null -

Obviously. Please stop putting options randomly together and wasting
everybody's time when they do not work. Instead take the necessary time
to learn how ffmpeg works.

Regards,
Carl Eugen Hoyos March 26, 2019, 3:32 p.m. UTC | #18
2019-03-26 16:28 GMT+01:00, Ulf Zibis <Ulf.Zibis@cosoco.de>:
>
> Am 26.03.19 um 16:10 schrieb Carl Eugen Hoyos:
>> 2019-03-26 15:56 GMT+01:00, Ulf Zibis <Ulf.Zibis@cosoco.de>:
>>
>>> I'm trying to benchmark -vf fillborders (added the timer
>>> code in vf_fillborders.c), so Carl Eugen's suggestion
>>> to use /dev/zero as input would not make sense.
>> Please elaborate.
>
> It seems I'm doing something wrong:
>
> ich@T500:~/Projects/ffmpeg/dev$ ./ffmpeg-p7b -y -stream_loop 1024
> -i /dev/zero -vf fillborders=25:25:25:25:mirror -f null -

$ ffmpeg -f rawvideo -s hd1080 -i /dev/zero -vf ... -t 1000 -f null -

It may be that the performance of the filter cannot be
tested like this, I don't know.

Carl Eugen
Carl Eugen Hoyos March 26, 2019, 3:34 p.m. UTC | #19
2019-03-26 16:23 GMT+01:00, Ulf Zibis <Ulf.Zibis@cosoco.de>:
>
> Am 26.03.19 um 16:00 schrieb Nicolas George:
>> Using the "color" filter source may be a little more
>> efficient, and is much more convenient.
> With
> ffplay -f lavfi color=green
> I only see a monotone picture. This is not apropriate
> to test the fillborders filter with mode=mirror.

Why not?

Carl Eugen
Ulf Zibis March 26, 2019, 4:01 p.m. UTC | #20
Am 26.03.19 um 16:26 schrieb Nicolas George:
>
> Try testsrc2.
Bad news:

ich@T500:~/Projects/ffmpeg/dev$ ./ffmpeg-p7b testsrc2 -loop 1024 -vf
fillborders=25:25:25:25:mirror -f null -
ffmpeg version N-93458-g18429ce896 Copyright (c) 2000-2019 the FFmpeg
developers
  built with gcc 7 (Ubuntu 7.3.0-27ubuntu1~18.04)
  configuration:
  libavutil      56. 26.100 / 56. 26.100
  libavcodec     58. 47.105 / 58. 47.105
  libavformat    58. 26.101 / 58. 26.101
  libavdevice    58.  7.100 / 58.  7.100
  libavfilter     7. 48.100 /  7. 48.100
  libswscale      5.  4.100 /  5.  4.100
  libswresample   3.  4.100 /  3.  4.100
[NULL @ 0x5636e78ba900] Unable to find a suitable output format for
'testsrc2'
testsrc2: Invalid argument
ich@T500:~/Projects/ffmpeg/dev$ ./ffmpeg-p7b -f lavfi testsrc2 -loop
1024 -vf fillborders=25:25:25:25:mirror -f null -
ffmpeg version N-93458-g18429ce896 Copyright (c) 2000-2019 the FFmpeg
developers
  built with gcc 7 (Ubuntu 7.3.0-27ubuntu1~18.04)
  configuration:
  libavutil      56. 26.100 / 56. 26.100
  libavcodec     58. 47.105 / 58. 47.105
  libavformat    58. 26.101 / 58. 26.101
  libavdevice    58.  7.100 / 58.  7.100
  libavfilter     7. 48.100 /  7. 48.100
  libswscale      5.  4.100 /  5.  4.100
  libswresample   3.  4.100 /  3.  4.100
[NULL @ 0x564b7e1fb940] Requested output format 'lavfi' is not a
suitable output format
testsrc2: Invalid argument

Anyway this test video is not really appropriate for my tests with
fillborders filter:
1.) The edges are mostly monotone, so I would hardly see an effect after
mirroring the borders
2.) I still want to see the performance on single pictures.

-Ulf
Ulf Zibis March 26, 2019, 4:09 p.m. UTC | #21
Am 26.03.19 um 16:34 schrieb Carl Eugen Hoyos:
> 2019-03-26 16:23 GMT+01:00, Ulf Zibis <Ulf.Zibis@cosoco.de>:
>> Am 26.03.19 um 16:00 schrieb Nicolas George:
>>> Using the "color" filter source may be a little more
>>> efficient, and is much more convenient.
>> With
>> ffplay -f lavfi color=green
>> I only see a monotone picture. This is not apropriate
>> to test the fillborders filter with mode=mirror.
> Why not?
Well, it may be good for the performance test, but can't test the
algorithmic correctness of the tweaked vf_fillborders.c code.
Additionally I want to test on different ...
- number of planes
- color model
- bit depth

-Ulf
Carl Eugen Hoyos March 26, 2019, 4:12 p.m. UTC | #22
2019-03-26 17:09 GMT+01:00, Ulf Zibis <Ulf.Zibis@cosoco.de>:
>
> Am 26.03.19 um 16:34 schrieb Carl Eugen Hoyos:
>> 2019-03-26 16:23 GMT+01:00, Ulf Zibis <Ulf.Zibis@cosoco.de>:
>>> Am 26.03.19 um 16:00 schrieb Nicolas George:
>>>> Using the "color" filter source may be a little more
>>>> efficient, and is much more convenient.
>>> With
>>> ffplay -f lavfi color=green
>>> I only see a monotone picture. This is not apropriate
>>> to test the fillborders filter with mode=mirror.
>> Why not?
> Well, it may be good for the performance test

I was under the impression that we exchanged all
these emails today only because you still hadn't
found a way to measure the performance of your
patch.
I hoped you had already tested the functional
correctness.

Carl Eugen
Ulf Zibis March 26, 2019, 4:17 p.m. UTC | #23
Am 26.03.19 um 16:32 schrieb Carl Eugen Hoyos:
> 2019-03-26 16:28 GMT+01:00, Ulf Zibis <Ulf.Zibis@cosoco.de>:
>> Am 26.03.19 um 16:10 schrieb Carl Eugen Hoyos:
>>> 2019-03-26 15:56 GMT+01:00, Ulf Zibis <Ulf.Zibis@cosoco.de>:
>>>
>>>> I'm trying to benchmark -vf fillborders (added the timer
>>>> code in vf_fillborders.c), so Carl Eugen's suggestion
>>>> to use /dev/zero as input would not make sense.
>>> Please elaborate.
>> It seems I'm doing something wrong:
>>
>> ich@T500:~/Projects/ffmpeg/dev$ ./ffmpeg-p7b -y -stream_loop 1024
>> -i /dev/zero -vf fillborders=25:25:25:25:mirror -f null -
> $ ffmpeg -f rawvideo -s hd1080 -i /dev/zero -vf ... -t 1000 -f null -
>
> It may be that the performance of the filter cannot be
> tested like this, I don't know.

Thanks for your help!

I suspect, you are right on not suitable for performance test.
... and for sure not for algorithmic tests.

Unfortunately my initial question is still open:
... but I ask, because I want to understand the purpose of the shorter
options "-loop number" and "-stream_loop number" (or how to apply them
correctly in the command line to get the wanted effect).

-Ulf
Carl Eugen Hoyos March 26, 2019, 4:20 p.m. UTC | #24
2019-03-26 17:17 GMT+01:00, Ulf Zibis <Ulf.Zibis@cosoco.de>:
>
> Am 26.03.19 um 16:32 schrieb Carl Eugen Hoyos:
>> 2019-03-26 16:28 GMT+01:00, Ulf Zibis <Ulf.Zibis@cosoco.de>:
>>> Am 26.03.19 um 16:10 schrieb Carl Eugen Hoyos:
>>>> 2019-03-26 15:56 GMT+01:00, Ulf Zibis <Ulf.Zibis@cosoco.de>:
>>>>
>>>>> I'm trying to benchmark -vf fillborders (added the timer
>>>>> code in vf_fillborders.c), so Carl Eugen's suggestion
>>>>> to use /dev/zero as input would not make sense.
>>>> Please elaborate.
>>> It seems I'm doing something wrong:
>>>
>>> ich@T500:~/Projects/ffmpeg/dev$ ./ffmpeg-p7b -y -stream_loop 1024
>>> -i /dev/zero -vf fillborders=25:25:25:25:mirror -f null -
>> $ ffmpeg -f rawvideo -s hd1080 -i /dev/zero -vf ... -t 1000 -f null -
>>
>> It may be that the performance of the filter cannot be
>> tested like this, I don't know.

> I suspect, you are right on not suitable for performance test.

(I did not claim that, on the contrary.)
Why not?

Carl Eugen
Ulf Zibis March 26, 2019, 4:36 p.m. UTC | #25
Am 26.03.19 um 17:20 schrieb Carl Eugen Hoyos:
> 2019-03-26 17:17 GMT+01:00, Ulf Zibis <Ulf.Zibis@cosoco.de>:
>> Am 26.03.19 um 16:32 schrieb Carl Eugen Hoyos:
>>> 2019-03-26 16:28 GMT+01:00, Ulf Zibis <Ulf.Zibis@cosoco.de>:
>>>> Am 26.03.19 um 16:10 schrieb Carl Eugen Hoyos:
>>>>> 2019-03-26 15:56 GMT+01:00, Ulf Zibis <Ulf.Zibis@cosoco.de>:
>>>>>
>>>>>> I'm trying to benchmark -vf fillborders (added the timer
>>>>>> code in vf_fillborders.c), so Carl Eugen's suggestion
>>>>>> to use /dev/zero as input would not make sense.
>>>>> Please elaborate.
>>>> It seems I'm doing something wrong:
>>>>
>>>> ich@T500:~/Projects/ffmpeg/dev$ ./ffmpeg-p7b -y -stream_loop 1024
>>>> -i /dev/zero -vf fillborders=25:25:25:25:mirror -f null -
>>> $ ffmpeg -f rawvideo -s hd1080 -i /dev/zero -vf ... -t 1000 -f null -
>>>
>>> It may be that the performance of the filter cannot be
>>> tested like this, I don't know.
>> I suspect, you are right on not suitable for performance test.
> (I did not claim that, on the contrary.)
> Why not?
1.) There may be a shortcut in CPU architecture for copying nulls in
series (fillborders.c essentially does that) and more important ...
2.) Additionally I want to test on different ...
- number of planes
- color model /resolution
- bit depth

-Ulf
Carl Eugen Hoyos March 26, 2019, 4:39 p.m. UTC | #26
2019-03-26 17:36 GMT+01:00, Ulf Zibis <Ulf.Zibis@cosoco.de>:
>
> Am 26.03.19 um 17:20 schrieb Carl Eugen Hoyos:
>> 2019-03-26 17:17 GMT+01:00, Ulf Zibis <Ulf.Zibis@cosoco.de>:
>>> Am 26.03.19 um 16:32 schrieb Carl Eugen Hoyos:
>>>> 2019-03-26 16:28 GMT+01:00, Ulf Zibis <Ulf.Zibis@cosoco.de>:
>>>>> Am 26.03.19 um 16:10 schrieb Carl Eugen Hoyos:
>>>>>> 2019-03-26 15:56 GMT+01:00, Ulf Zibis <Ulf.Zibis@cosoco.de>:
>>>>>>
>>>>>>> I'm trying to benchmark -vf fillborders (added the timer
>>>>>>> code in vf_fillborders.c), so Carl Eugen's suggestion
>>>>>>> to use /dev/zero as input would not make sense.
>>>>>> Please elaborate.
>>>>> It seems I'm doing something wrong:
>>>>>
>>>>> ich@T500:~/Projects/ffmpeg/dev$ ./ffmpeg-p7b -y -stream_loop 1024
>>>>> -i /dev/zero -vf fillborders=25:25:25:25:mirror -f null -
>>>> $ ffmpeg -f rawvideo -s hd1080 -i /dev/zero -vf ... -t 1000 -f null -
>>>>
>>>> It may be that the performance of the filter cannot be
>>>> tested like this, I don't know.
>>> I suspect, you are right on not suitable for performance test.
>> (I did not claim that, on the contrary.)
>> Why not?

> 1.) There may be a shortcut in CPU architecture for copying nulls in
> series (fillborders.c essentially does that) and more important ...

I am curious:
Which architecture are you thinking about that interprets
FFmpeg's inner structure?

> 2.) Additionally I want to test on different ...
> - number of planes
> - color model /resolution
> - bit depth

Use the input option -pix_fmt

Carl Eugen
Ulf Zibis March 26, 2019, 4:59 p.m. UTC | #27
Am 26.03.19 um 17:39 schrieb Carl Eugen Hoyos:
>
>> 1.) There may be a shortcut in CPU architecture for copying nulls in
>> series (fillborders.c essentially does that) and more important ...
> I am curious:
> Which architecture are you thinking about that interprets
> FFmpeg's inner structure?
I was inspired of your suspicion. ;-) From Java code I know, that such
things happen as cause of the JIT "just in time compiler" optimization,
don't know, if modern C compilers assemble similar effects.

>> 2.) Additionally I want to test on different ...
>> - number of planes
>> - color model /resolution
>> - bit depth
> Use the input option -pix_fmt

Ok, I'll look on that.

And I'm still curious to read something on my initial question
(following your suggestion from 19.03.19, 17:31 CET to use "-loop"):
... I ask, because I want to understand the purpose of the shorter
options "-loop number" and "-stream_loop number" (or how to apply them
correctly in the command line to get the wanted effect on single picture
input).

-Ulf
Paul B Mahol March 26, 2019, 5:19 p.m. UTC | #28
On 3/26/19, Ulf Zibis <Ulf.Zibis@cosoco.de> wrote:
>
> Am 26.03.19 um 17:39 schrieb Carl Eugen Hoyos:
>>
>>> 1.) There may be a shortcut in CPU architecture for copying nulls in
>>> series (fillborders.c essentially does that) and more important ...
>> I am curious:
>> Which architecture are you thinking about that interprets
>> FFmpeg's inner structure?
> I was inspired of your suspicion. ;-) From Java code I know, that such
> things happen as cause of the JIT "just in time compiler" optimization,
> don't know, if modern C compilers assemble similar effects.
>
>>> 2.) Additionally I want to test on different ...
>>> - number of planes
>>> - color model /resolution
>>> - bit depth
>> Use the input option -pix_fmt
>
> Ok, I'll look on that.
>
> And I'm still curious to read something on my initial question
> (following your suggestion from 19.03.19, 17:31 CET to use "-loop"):
> ... I ask, because I want to understand the purpose of the shorter
> options "-loop number" and "-stream_loop number" (or how to apply them
> correctly in the command line to get the wanted effect on single picture
> input).

If you want to work on ffmpeg, you certainly need to learn how to use.
You showed very little skills.
Ulf Zibis March 26, 2019, 10:33 p.m. UTC | #29
Am 26.03.19 um 16:32 schrieb Carl Eugen Hoyos:
>>> Please elaborate.
>> It seems I'm doing something wrong:
>>
>> ich@T500:~/Projects/ffmpeg/dev$ ./ffmpeg-p7b -y -stream_loop 1024
>> -i /dev/zero -vf fillborders=25:25:25:25:mirror -f null -
> $ ffmpeg -f rawvideo -s hd1080 -i /dev/zero -vf ... -t 1000 -f null -
Thanks. With "-t 1000" it loops endless, but it works fine with:

$ ffmpeg -f rawvideo -s 300x200 -i /dev/zero -vf ... -frames 1024 -f null -

-Ulf
Carl Eugen Hoyos March 26, 2019, 10:37 p.m. UTC | #30
2019-03-26 23:33 GMT+01:00, Ulf Zibis <Ulf.Zibis@cosoco.de>:
>
> Am 26.03.19 um 16:32 schrieb Carl Eugen Hoyos:
>>>> Please elaborate.
>>> It seems I'm doing something wrong:
>>>
>>> ich@T500:~/Projects/ffmpeg/dev$ ./ffmpeg-p7b -y -stream_loop 1024
>>> -i /dev/zero -vf fillborders=25:25:25:25:mirror -f null -
>> $ ffmpeg -f rawvideo -s hd1080 -i /dev/zero -vf ... -t 1000 -f null -
> Thanks. With "-t 1000" it loops endless

Unlikely.

Carl Eugen
Ulf Zibis March 27, 2019, 12:12 p.m. UTC | #31
Am 26.03.19 um 17:12 schrieb Carl Eugen Hoyos:
> I was under the impression that we exchanged all
> these emails today only because you still hadn't
> found a way to measure the performance of your
> patch.

As I had written, I found a way with "-vf
loop=loop=1024:size=1:start=0", but I was curious how I could use the
shorter options -loop or -stream_loop from your suggestion of 19.03.19,
17:31 CET.
This does not mean, that I unlike your new suggestion with "-f rawvideo
....".

> I hoped you had already tested the functional correctness.

Until this state of changes yes. But it is more convenient in my IDE
configuration to have only 1 script for both purposes.

Thanks for all your help

-Ulf
diff mbox

Patch

From 8201c51ced164efdcee292f71cc94d0a7ab38b18 Mon Sep 17 00:00:00 2001
From: Ulf Zibis <Ulf.Zibis@CoSoCo.de>
Date: 19.03.2019, 02:52:44

avfilter/fillborders: avoid needless calculations for performance; added ffmpeg-p2 and rgba64le-lzw.tif to benchmark

diff --git a/debug/fillborders.sh b/debug/fillborders.sh
index 671032b..1bd7e12 100755
--- a/debug/fillborders.sh
+++ b/debug/fillborders.sh
@@ -1,8 +1,9 @@ 
 #!/bin/bash
 i=0
-test[i++]="3-plane 8-bit YUV-colour:    CYD_1005.jpg"
-test[i++]="4-plane 8-bit RGB-colour:    8.jpg"
-test[i++]="4-plane 16-bit RGB-colour:   16.jpg"
+test[i++]="3-plane 8-bit  YUV-420:   CYD_1005.jpg"
+#test[i++]="1-plane 8-bit  Y-400:     8.jpg"
+#test[i++]="1-plane 16-bit Y-400:     16.jpg"
+#test[i++]="4-plane 16-bit RGB-444:   rgba64le-lzw.tif"
 
 for ((i=0;i<${#test[@]};i++))
 do
@@ -13,10 +14,10 @@ 
     for borders in "0:0:5:5" "5:5:0:0" "5:5:5:5"
     do
         output="ZZ_${input%.*}_${mode%:*}-${borders//:/-}.${input##*.}"
-        for patch in "ffmpeg     " "./ffmpeg-p1"
+        for patch in "./ffmpeg-p1" "./ffmpeg-p2"
         do
-            echo "${patch} ${input} --> ${output}"
-            for ((benchmark=0;benchmark<3;benchmark++))
+            echo "${patch} : ${input} --> ${output}"
+            for ((benchmark=0;benchmark<6;benchmark++))
             do
                  ${patch} -y -v error -i debug/${input} -vf fillborders=${borders}:${mode} debug/${output}
             done
diff --git a/libavfilter/vf_fillborders.c b/libavfilter/vf_fillborders.c
index e06b8e8..15cf5d9 100644
--- a/libavfilter/vf_fillborders.c
+++ b/libavfilter/vf_fillborders.c
@@ -93,14 +93,16 @@ 
         int linesize = frame->linesize[p];
 
         /* fill left and right borders from top to bottom border */
-        for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) {
-            memset(data + y * linesize,
-                    *(data + y * linesize + s->borders[p].left),
-                    s->borders[p].left);
-            memset(data + y * linesize + s->planewidth[p] - s->borders[p].right,
-                    *(data + y * linesize + s->planewidth[p] - s->borders[p].right - 1),
-                    s->borders[p].right);
-        }
+        if (s->borders[p].left != 0 ||
+                s->borders[p].right != s->planewidth[p]) // in case skip for performance
+            for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) {
+                memset(data + y * linesize,
+                        *(data + y * linesize + s->borders[p].left),
+                        s->borders[p].left);
+                memset(data + y * linesize + s->planewidth[p] - s->borders[p].right,
+                        *(data + y * linesize + s->planewidth[p] - s->borders[p].right - 1),
+                        s->borders[p].right);
+            }
 
         /* fill top and bottom borders */
         for (y = 0; y < s->borders[p].top; y++) {
@@ -124,15 +126,17 @@ 
         int linesize = frame->linesize[p] / sizeof(uint16_t);
 
         /* fill left and right borders from top to bottom border */
-        for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) {
-            for (x = 0; x < s->borders[p].left; x++) {
-                data[y * linesize + x] = *(data + y * linesize + s->borders[p].left);
+        if (s->borders[p].left != 0 ||
+                s->borders[p].right != s->planewidth[p]) // in case skip for performance
+            for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) {
+                for (x = 0; x < s->borders[p].left; x++) {
+                    data[y * linesize + x] = *(data + y * linesize + s->borders[p].left);
+                }
+                for (x = 0; x < s->borders[p].right; x++) {
+                    data[y * linesize + s->planewidth[p] - s->borders[p].right + x] =
+                            *(data + y * linesize + s->planewidth[p] - s->borders[p].right - 1);
+                }
             }
-            for (x = 0; x < s->borders[p].right; x++) {
-                data[y * linesize + s->planewidth[p] - s->borders[p].right + x] =
-                    *(data + y * linesize + s->planewidth[p] - s->borders[p].right - 1);
-            }
-        }
 
         /* fill top and bottom borders */
         for (y = 0; y < s->borders[p].top; y++) {
@@ -156,15 +160,17 @@ 
         int linesize = frame->linesize[p];
 
         /* fill left and right borders from top to bottom border */
-        for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) {
-            for (x = 0; x < s->borders[p].left; x++) {
-                data[y * linesize + x] = data[y * linesize + s->borders[p].left * 2 - 1 - x];
+        if (s->borders[p].left != 0 ||
+                s->borders[p].right != s->planewidth[p]) // in case skip for performance
+            for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) {
+                for (x = 0; x < s->borders[p].left; x++) {
+                    data[y * linesize + x] = data[y * linesize + s->borders[p].left * 2 - 1 - x];
+                }
+                for (x = 0; x < s->borders[p].right; x++) {
+                    data[y * linesize + s->planewidth[p] - s->borders[p].right + x] =
+                            data[y * linesize + s->planewidth[p] - s->borders[p].right - 1 - x];
+                }
             }
-            for (x = 0; x < s->borders[p].right; x++) {
-                data[y * linesize + s->planewidth[p] - s->borders[p].right + x] =
-                    data[y * linesize + s->planewidth[p] - s->borders[p].right - 1 - x];
-            }
-        }
 
         /* fill top and bottom borders */
         for (y = 0; y < s->borders[p].top; y++) {
@@ -189,16 +195,18 @@ 
         int linesize = frame->linesize[p] / sizeof(uint16_t);
 
         /* fill left and right borders from top to bottom border */
-        for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) {
-            for (x = 0; x < s->borders[p].left; x++) {
-                data[y * linesize + x] = data[y * linesize + s->borders[p].left * 2 - 1 - x];
-            }
+        if (s->borders[p].left != 0 ||
+                s->borders[p].right != s->planewidth[p]) // in case skip for performance
+            for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) {
+                for (x = 0; x < s->borders[p].left; x++) {
+                    data[y * linesize + x] = data[y * linesize + s->borders[p].left * 2 - 1 - x];
+                }
 
-            for (x = 0; x < s->borders[p].right; x++) {
-                data[y * linesize + s->planewidth[p] - s->borders[p].right + x] =
-                    data[y * linesize + s->planewidth[p] - s->borders[p].right - 1 - x];
+                for (x = 0; x < s->borders[p].right; x++) {
+                    data[y * linesize + s->planewidth[p] - s->borders[p].right + x] =
+                            data[y * linesize + s->planewidth[p] - s->borders[p].right - 1 - x];
+                }
             }
-        }
 
         /* fill top and bottom borders */
         for (y = 0; y < s->borders[p].top; y++) {
@@ -224,11 +232,13 @@ 
         int linesize = frame->linesize[p];
 
         /* fill left and right borders from top to bottom border */
-        for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) {
-            memset(data + y * linesize, fill, s->borders[p].left);
-            memset(data + y * linesize + s->planewidth[p] - s->borders[p].right, fill,
-                    s->borders[p].right);
-        }
+        if (s->borders[p].left != 0 ||
+                s->borders[p].right != s->planewidth[p]) // in case skip for performance
+            for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) {
+                memset(data + y * linesize, fill, s->borders[p].left);
+                memset(data + y * linesize + s->planewidth[p] - s->borders[p].right, fill,
+                        s->borders[p].right);
+            }
 
         /* fill top and bottom borders */
         for (y = 0; y < s->borders[p].top; y++) {
@@ -250,14 +260,16 @@ 
         int linesize = frame->linesize[p] / sizeof(uint16_t);
 
         /* fill left and right borders from top to bottom border */
-        for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) {
-            for (x = 0; x < s->borders[p].left; x++) {
-                data[y * linesize + x] = fill;
+        if (s->borders[p].left != 0 ||
+                s->borders[p].right != s->planewidth[p]) // in case skip for performance
+            for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) {
+                for (x = 0; x < s->borders[p].left; x++) {
+                    data[y * linesize + x] = fill;
+                }
+                for (x = 0; x < s->borders[p].right; x++) {
+                    data[y * linesize + s->planewidth[p] - s->borders[p].right + x] = fill;
+                }
             }
-            for (x = 0; x < s->borders[p].right; x++) {
-                data[y * linesize + s->planewidth[p] - s->borders[p].right + x] = fill;
-            }
-        }
 
         /* fill top and bottom borders */
         for (y = 0; y < s->borders[p].top; y++) {
@@ -303,6 +315,20 @@ 
     FillBordersContext *s = ctx->priv;
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
 
+    if (inlink->w < s->left + s->right ||
+            inlink->w <= s->left ||
+            inlink->w <= s->right ||
+            inlink->h < s->top + s->bottom ||
+            inlink->h <= s->top ||
+            inlink->h <= s->bottom ||
+            inlink->w < s->left * 2 ||
+            inlink->w < s->right * 2 ||
+            inlink->h < s->top * 2 ||
+            inlink->h < s->bottom * 2) {
+        av_log(ctx, AV_LOG_ERROR, "Borders are bigger than input frame size.\n");
+        return AVERROR(EINVAL);
+    }
+
     s->nb_planes = desc->nb_components;
     s->depth = desc->comp[0].depth;
 
@@ -326,40 +352,23 @@ 
     s->borders[2].top    = s->top >> desc->log2_chroma_h;
     s->borders[2].bottom = s->bottom >> desc->log2_chroma_h;
 
-    if (inlink->w < s->left + s->right ||
-            inlink->w <= s->left ||
-            inlink->w <= s->right ||
-            inlink->h < s->top + s->bottom ||
-            inlink->h <= s->top ||
-            inlink->h <= s->bottom ||
-            inlink->w < s->left * 2 ||
-            inlink->w < s->right * 2 ||
-            inlink->h < s->top * 2 ||
-            inlink->h < s->bottom * 2) {
-        av_log(ctx, AV_LOG_ERROR, "Borders are bigger than input frame size.\n");
-        return AVERROR(EINVAL);
-    }
-
     switch (s->mode) {
         case FM_SMEAR:  s->fillborders = s->depth <= 8 ? smear_borders8  : smear_borders16;  break;
         case FM_MIRROR: s->fillborders = s->depth <= 8 ? mirror_borders8 : mirror_borders16; break;
-        case FM_FIXED:  s->fillborders = s->depth <= 8 ? fixed_borders8  : fixed_borders16;  break;
-    }
-
-    s->yuv_color[Y] = RGB_TO_Y_CCIR(s->rgba_color[R], s->rgba_color[G], s->rgba_color[B]);
-    s->yuv_color[U] = RGB_TO_U_CCIR(s->rgba_color[R], s->rgba_color[G], s->rgba_color[B], 0);
-    s->yuv_color[V] = RGB_TO_V_CCIR(s->rgba_color[R], s->rgba_color[G], s->rgba_color[B], 0);
-    s->yuv_color[A] = s->rgba_color[A];
-
-    if (desc->flags & AV_PIX_FMT_FLAG_RGB) {
-        uint8_t rgba_map[4];
-        int i;
-
-        ff_fill_rgba_map(rgba_map, inlink->format);
-        for (i = 0; i < sizeof(rgba_map); i++)
-            s->fill[rgba_map[i]] = s->rgba_color[i];
-    } else {
-        memcpy(s->fill, s->yuv_color, sizeof(s->yuv_color));
+        case FM_FIXED:  s->fillborders = s->depth <= 8 ? fixed_borders8  : fixed_borders16;
+            if (desc->flags & AV_PIX_FMT_FLAG_RGB) {
+                uint8_t rgba_map[4];
+                int i;
+                ff_fill_rgba_map(rgba_map, inlink->format);
+                for (i = 0; i < sizeof(rgba_map); i++)
+                    s->fill[rgba_map[i]] = s->rgba_color[i];
+            } else {
+                s->yuv_color[Y] = RGB_TO_Y_CCIR(s->rgba_color[R], s->rgba_color[G], s->rgba_color[B]);
+                s->yuv_color[U] = RGB_TO_U_CCIR(s->rgba_color[R], s->rgba_color[G], s->rgba_color[B], 0);
+                s->yuv_color[V] = RGB_TO_V_CCIR(s->rgba_color[R], s->rgba_color[G], s->rgba_color[B], 0);
+                s->yuv_color[A] = s->rgba_color[A];
+                memcpy(s->fill, s->yuv_color, sizeof(s->yuv_color));
+            } break;
     }
 
     return 0;