diff mbox series

[FFmpeg-devel,v2,7/7] avcodec/mpegvideoencdsp: speed up draw_edges_8_c by inlining it for all used edge widths

Message ID 20240821145555.235323-8-ramiro.polla@gmail.com
State New
Headers show
Series avcodec/mpegvideoencdsp improvements | expand

Commit Message

Ramiro Polla Aug. 21, 2024, 2:55 p.m. UTC
This commit also restricts w to 4, 8, or 16.

Intel(R) Core(TM) i5-5300U CPU @ 2.30GHz:
                                    before    after
draw_edges_8_1724_4_c:             46796.5   7141.7  ( 6.55x)
draw_edges_8_1724_8_c:             43584.5   7216.5  ( 6.04x)
draw_edges_8_1724_16_c:            47007.2  10080.5  ( 4.66x)
draw_edges_128_407_4_c:            11199.0   4185.0  ( 2.68x)
draw_edges_128_407_8_c:            10660.2   4418.0  ( 2.41x)
draw_edges_128_407_16_c:           11800.2   4634.5  ( 2.55x)
draw_edges_1080_31_4_c:             1356.5    634.7  ( 2.14x)
draw_edges_1080_31_8_c:             1972.0   1430.2  ( 1.38x)
draw_edges_1080_31_16_c:            4621.0   4009.7  ( 1.15x)
draw_edges_1920_4_4_c:               834.5    795.2  ( 1.05x)
draw_edges_1920_4_4_negstride_c:     821.7    802.0  ( 1.02x)
draw_edges_1920_4_8_c:              2782.2   2650.7  ( 1.05x)
draw_edges_1920_4_8_negstride_c:    2724.7   2670.0  ( 1.02x)
draw_edges_1920_4_16_c:             6437.5   6327.7  ( 1.02x)
draw_edges_1920_4_16_negstride_c:   6395.2   6349.5  ( 1.01x)

A55:
                                    before    after
draw_edges_8_1724_4_c:             52540.4  19739.2  ( 2.66x)
draw_edges_8_1724_8_c:             45386.9  19847.4  ( 2.29x)
draw_edges_8_1724_16_c:            51995.4  23284.7  ( 2.23x)
draw_edges_128_407_4_c:            13401.1   6988.2  ( 1.92x)
draw_edges_128_407_8_c:            12218.4   7527.9  ( 1.62x)
draw_edges_128_407_16_c:           13695.9   8207.2  ( 1.67x)
draw_edges_1080_31_4_c:             3702.9   3110.4  ( 1.19x)
draw_edges_1080_31_8_c:             6015.6   5643.2  ( 1.07x)
draw_edges_1080_31_16_c:           12281.9  11901.4  ( 1.03x)
draw_edges_1920_4_4_c:              3957.9   3970.2  ( 1.00x)
draw_edges_1920_4_4_negstride_c:    3964.1   3825.2  ( 1.04x)
draw_edges_1920_4_8_c:              7757.9   7676.4  ( 1.01x)
draw_edges_1920_4_8_negstride_c:    7923.6   7812.4  ( 1.01x)
draw_edges_1920_4_16_c:            14791.6  15143.9  ( 0.98x)
draw_edges_1920_4_16_negstride_c:  14788.6  15163.4  ( 0.98x)

A76:
                                    before   after
draw_edges_8_1724_4_c:             39786.0  4968.5  ( 8.01x)
draw_edges_8_1724_8_c:             32971.5  5069.5  ( 6.50x)
draw_edges_8_1724_16_c:            40056.0  6017.2  ( 6.66x)
draw_edges_128_407_4_c:             9517.2  1210.5  ( 7.86x)
draw_edges_128_407_8_c:             8035.7  1346.2  ( 5.97x)
draw_edges_128_407_16_c:            9946.5  1648.2  ( 6.03x)
draw_edges_1080_31_4_c:             1308.0   660.7  ( 1.98x)
draw_edges_1080_31_8_c:             1785.5  1270.7  ( 1.41x)
draw_edges_1080_31_16_c:            3266.7  2591.5  ( 1.26x)
draw_edges_1920_4_4_c:              1151.0  1090.7  ( 1.06x)
draw_edges_1920_4_4_negstride_c:    1153.7  1096.5  ( 1.05x)
draw_edges_1920_4_8_c:              2220.7  2186.5  ( 1.02x)
draw_edges_1920_4_8_negstride_c:    2218.5  2193.5  ( 1.01x)
draw_edges_1920_4_16_c:             4324.2  4230.0  ( 1.02x)
draw_edges_1920_4_16_negstride_c:   4310.7  4233.0  ( 1.02x)
---
 libavcodec/mpegvideoencdsp.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

Comments

Ramiro Polla Aug. 22, 2024, 11:33 a.m. UTC | #1
On Wed, Aug 21, 2024 at 4:56 PM Ramiro Polla <ramiro.polla@gmail.com> wrote:
> This commit also restricts w to 4, 8, or 16.
>
> Intel(R) Core(TM) i5-5300U CPU @ 2.30GHz:
>                                     before    after
> draw_edges_8_1724_4_c:             46796.5   7141.7  ( 6.55x)
> draw_edges_8_1724_8_c:             43584.5   7216.5  ( 6.04x)
> draw_edges_8_1724_16_c:            47007.2  10080.5  ( 4.66x)
> draw_edges_128_407_4_c:            11199.0   4185.0  ( 2.68x)
> draw_edges_128_407_8_c:            10660.2   4418.0  ( 2.41x)
> draw_edges_128_407_16_c:           11800.2   4634.5  ( 2.55x)
> draw_edges_1080_31_4_c:             1356.5    634.7  ( 2.14x)
> draw_edges_1080_31_8_c:             1972.0   1430.2  ( 1.38x)
> draw_edges_1080_31_16_c:            4621.0   4009.7  ( 1.15x)
> draw_edges_1920_4_4_c:               834.5    795.2  ( 1.05x)
> draw_edges_1920_4_4_negstride_c:     821.7    802.0  ( 1.02x)
> draw_edges_1920_4_8_c:              2782.2   2650.7  ( 1.05x)
> draw_edges_1920_4_8_negstride_c:    2724.7   2670.0  ( 1.02x)
> draw_edges_1920_4_16_c:             6437.5   6327.7  ( 1.02x)
> draw_edges_1920_4_16_negstride_c:   6395.2   6349.5  ( 1.01x)
>
> A55:
>                                     before    after
> draw_edges_8_1724_4_c:             52540.4  19739.2  ( 2.66x)
> draw_edges_8_1724_8_c:             45386.9  19847.4  ( 2.29x)
> draw_edges_8_1724_16_c:            51995.4  23284.7  ( 2.23x)
> draw_edges_128_407_4_c:            13401.1   6988.2  ( 1.92x)
> draw_edges_128_407_8_c:            12218.4   7527.9  ( 1.62x)
> draw_edges_128_407_16_c:           13695.9   8207.2  ( 1.67x)
> draw_edges_1080_31_4_c:             3702.9   3110.4  ( 1.19x)
> draw_edges_1080_31_8_c:             6015.6   5643.2  ( 1.07x)
> draw_edges_1080_31_16_c:           12281.9  11901.4  ( 1.03x)
> draw_edges_1920_4_4_c:              3957.9   3970.2  ( 1.00x)
> draw_edges_1920_4_4_negstride_c:    3964.1   3825.2  ( 1.04x)
> draw_edges_1920_4_8_c:              7757.9   7676.4  ( 1.01x)
> draw_edges_1920_4_8_negstride_c:    7923.6   7812.4  ( 1.01x)
> draw_edges_1920_4_16_c:            14791.6  15143.9  ( 0.98x)
> draw_edges_1920_4_16_negstride_c:  14788.6  15163.4  ( 0.98x)
>
> A76:
>                                     before   after
> draw_edges_8_1724_4_c:             39786.0  4968.5  ( 8.01x)
> draw_edges_8_1724_8_c:             32971.5  5069.5  ( 6.50x)
> draw_edges_8_1724_16_c:            40056.0  6017.2  ( 6.66x)
> draw_edges_128_407_4_c:             9517.2  1210.5  ( 7.86x)
> draw_edges_128_407_8_c:             8035.7  1346.2  ( 5.97x)
> draw_edges_128_407_16_c:            9946.5  1648.2  ( 6.03x)
> draw_edges_1080_31_4_c:             1308.0   660.7  ( 1.98x)
> draw_edges_1080_31_8_c:             1785.5  1270.7  ( 1.41x)
> draw_edges_1080_31_16_c:            3266.7  2591.5  ( 1.26x)
> draw_edges_1920_4_4_c:              1151.0  1090.7  ( 1.06x)
> draw_edges_1920_4_4_negstride_c:    1153.7  1096.5  ( 1.05x)
> draw_edges_1920_4_8_c:              2220.7  2186.5  ( 1.02x)
> draw_edges_1920_4_8_negstride_c:    2218.5  2193.5  ( 1.01x)
> draw_edges_1920_4_16_c:             4324.2  4230.0  ( 1.02x)
> draw_edges_1920_4_16_negstride_c:   4310.7  4233.0  ( 1.02x)
> ---

Ping on the draw_edges patches in this patchset.

I'll apply in a few days if there are no comments.
Ramiro Polla Aug. 26, 2024, 10:55 a.m. UTC | #2
On Thu, Aug 22, 2024 at 1:33 PM Ramiro Polla <ramiro.polla@gmail.com> wrote:
> On Wed, Aug 21, 2024 at 4:56 PM Ramiro Polla <ramiro.polla@gmail.com> wrote:
> > This commit also restricts w to 4, 8, or 16.
> >
> > Intel(R) Core(TM) i5-5300U CPU @ 2.30GHz:
> >                                     before    after
> > draw_edges_8_1724_4_c:             46796.5   7141.7  ( 6.55x)
> > draw_edges_8_1724_8_c:             43584.5   7216.5  ( 6.04x)
> > draw_edges_8_1724_16_c:            47007.2  10080.5  ( 4.66x)
> > draw_edges_128_407_4_c:            11199.0   4185.0  ( 2.68x)
> > draw_edges_128_407_8_c:            10660.2   4418.0  ( 2.41x)
> > draw_edges_128_407_16_c:           11800.2   4634.5  ( 2.55x)
> > draw_edges_1080_31_4_c:             1356.5    634.7  ( 2.14x)
> > draw_edges_1080_31_8_c:             1972.0   1430.2  ( 1.38x)
> > draw_edges_1080_31_16_c:            4621.0   4009.7  ( 1.15x)
> > draw_edges_1920_4_4_c:               834.5    795.2  ( 1.05x)
> > draw_edges_1920_4_4_negstride_c:     821.7    802.0  ( 1.02x)
> > draw_edges_1920_4_8_c:              2782.2   2650.7  ( 1.05x)
> > draw_edges_1920_4_8_negstride_c:    2724.7   2670.0  ( 1.02x)
> > draw_edges_1920_4_16_c:             6437.5   6327.7  ( 1.02x)
> > draw_edges_1920_4_16_negstride_c:   6395.2   6349.5  ( 1.01x)
> >
> > A55:
> >                                     before    after
> > draw_edges_8_1724_4_c:             52540.4  19739.2  ( 2.66x)
> > draw_edges_8_1724_8_c:             45386.9  19847.4  ( 2.29x)
> > draw_edges_8_1724_16_c:            51995.4  23284.7  ( 2.23x)
> > draw_edges_128_407_4_c:            13401.1   6988.2  ( 1.92x)
> > draw_edges_128_407_8_c:            12218.4   7527.9  ( 1.62x)
> > draw_edges_128_407_16_c:           13695.9   8207.2  ( 1.67x)
> > draw_edges_1080_31_4_c:             3702.9   3110.4  ( 1.19x)
> > draw_edges_1080_31_8_c:             6015.6   5643.2  ( 1.07x)
> > draw_edges_1080_31_16_c:           12281.9  11901.4  ( 1.03x)
> > draw_edges_1920_4_4_c:              3957.9   3970.2  ( 1.00x)
> > draw_edges_1920_4_4_negstride_c:    3964.1   3825.2  ( 1.04x)
> > draw_edges_1920_4_8_c:              7757.9   7676.4  ( 1.01x)
> > draw_edges_1920_4_8_negstride_c:    7923.6   7812.4  ( 1.01x)
> > draw_edges_1920_4_16_c:            14791.6  15143.9  ( 0.98x)
> > draw_edges_1920_4_16_negstride_c:  14788.6  15163.4  ( 0.98x)
> >
> > A76:
> >                                     before   after
> > draw_edges_8_1724_4_c:             39786.0  4968.5  ( 8.01x)
> > draw_edges_8_1724_8_c:             32971.5  5069.5  ( 6.50x)
> > draw_edges_8_1724_16_c:            40056.0  6017.2  ( 6.66x)
> > draw_edges_128_407_4_c:             9517.2  1210.5  ( 7.86x)
> > draw_edges_128_407_8_c:             8035.7  1346.2  ( 5.97x)
> > draw_edges_128_407_16_c:            9946.5  1648.2  ( 6.03x)
> > draw_edges_1080_31_4_c:             1308.0   660.7  ( 1.98x)
> > draw_edges_1080_31_8_c:             1785.5  1270.7  ( 1.41x)
> > draw_edges_1080_31_16_c:            3266.7  2591.5  ( 1.26x)
> > draw_edges_1920_4_4_c:              1151.0  1090.7  ( 1.06x)
> > draw_edges_1920_4_4_negstride_c:    1153.7  1096.5  ( 1.05x)
> > draw_edges_1920_4_8_c:              2220.7  2186.5  ( 1.02x)
> > draw_edges_1920_4_8_negstride_c:    2218.5  2193.5  ( 1.01x)
> > draw_edges_1920_4_16_c:             4324.2  4230.0  ( 1.02x)
> > draw_edges_1920_4_16_negstride_c:   4310.7  4233.0  ( 1.02x)
> > ---
>
> Ping on the draw_edges patches in this patchset.
>
> I'll apply in a few days if there are no comments.

Applied.
diff mbox series

Patch

diff --git a/libavcodec/mpegvideoencdsp.c b/libavcodec/mpegvideoencdsp.c
index 1091c94574..00a2c4ba71 100644
--- a/libavcodec/mpegvideoencdsp.c
+++ b/libavcodec/mpegvideoencdsp.c
@@ -114,19 +114,31 @@  static int pix_norm1_c(const uint8_t *pix, int line_size)
     return s;
 }
 
+static av_always_inline void draw_edges_lr(uint8_t *ptr, int wrap, int width, int height, int w)
+{
+    for (int i = 0; i < height; i++) {
+        memset(ptr - w, ptr[0], w);
+        memset(ptr + width, ptr[width - 1], w);
+        ptr += wrap;
+    }
+}
+
 /* draw the edges of width 'w' of an image of size width, height */
 // FIXME: Check that this is OK for MPEG-4 interlaced.
 static void draw_edges_8_c(uint8_t *buf, int wrap, int width, int height,
                            int w, int h, int sides)
 {
-    uint8_t *ptr = buf, *last_line;
+    uint8_t *last_line;
     int i;
 
     /* left and right */
-    for (i = 0; i < height; i++) {
-        memset(ptr - w, ptr[0], w);
-        memset(ptr + width, ptr[width - 1], w);
-        ptr += wrap;
+    if (w == 16) {
+        draw_edges_lr(buf, wrap, width, height, 16);
+    } else if (w == 8) {
+        draw_edges_lr(buf, wrap, width, height, 8);
+    } else {
+        av_assert1(w == 4);
+        draw_edges_lr(buf, wrap, width, height, 4);
     }
 
     /* top and bottom + corners */