diff mbox series

[FFmpeg-devel,1/4] swscale: add nv24/nv42 to yuv420p unscaled converter

Message ID 20240809112612.107000-1-ramiro.polla@gmail.com
State New
Headers show
Series [FFmpeg-devel,1/4] swscale: add nv24/nv42 to yuv420p unscaled converter | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished

Commit Message

Ramiro Polla Aug. 9, 2024, 11:26 a.m. UTC
---
 libswscale/swscale_unscaled.c | 45 +++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

Comments

Martin Storsjö Aug. 16, 2024, 10:58 a.m. UTC | #1
On Thu, 15 Aug 2024, Ramiro Polla wrote:

> Thank you for the review. New patch attached.

Thanks - this looks very straightforward and nice now! Just one minor nit 
below:

> +        add             x4, x4, x5, sxtw            // src1 += srcPadding
> +        add             x9, x9, x5, sxtw            // src2 += srcPadding
> +        add             x0, x0, x1, sxtw            // dst1 += dstPadding1
> +        add             x2, x2, x3, sxtw            // dst2 += dstPadding2

Since you're doing sxtw, I would have expected to have the last register 
referenced as wN, not xN. I'd guess that some picky versions of assemblers 
could error out due to this, so it could be good to change that just to be 
safe.

Other than that, this looks extremely straightforward and nice.

// Martin
Ramiro Polla Aug. 22, 2024, 11:11 a.m. UTC | #2
On Fri, Aug 16, 2024 at 12:58 PM Martin Storsjö <martin@martin.st> wrote:
> On Thu, 15 Aug 2024, Ramiro Polla wrote:
> > Thank you for the review. New patch attached.
>
> Thanks - this looks very straightforward and nice now! Just one minor nit
> below:
>
> > +        add             x4, x4, x5, sxtw            // src1 += srcPadding
> > +        add             x9, x9, x5, sxtw            // src2 += srcPadding
> > +        add             x0, x0, x1, sxtw            // dst1 += dstPadding1
> > +        add             x2, x2, x3, sxtw            // dst2 += dstPadding2
>
> Since you're doing sxtw, I would have expected to have the last register
> referenced as wN, not xN. I'd guess that some picky versions of assemblers
> could error out due to this, so it could be good to change that just to be
> safe.

Thanks for the review. I fixed this locally.

If there are no further comments, I'll apply this patchset in a few days.
Ramiro Polla Aug. 26, 2024, 9:09 a.m. UTC | #3
On Thu, Aug 22, 2024 at 1:11 PM Ramiro Polla <ramiro.polla@gmail.com> wrote:
> On Fri, Aug 16, 2024 at 12:58 PM Martin Storsjö <martin@martin.st> wrote:
> > On Thu, 15 Aug 2024, Ramiro Polla wrote:
> > > Thank you for the review. New patch attached.
> >
> > Thanks - this looks very straightforward and nice now! Just one minor nit
> > below:
> >
> > > +        add             x4, x4, x5, sxtw            // src1 += srcPadding
> > > +        add             x9, x9, x5, sxtw            // src2 += srcPadding
> > > +        add             x0, x0, x1, sxtw            // dst1 += dstPadding1
> > > +        add             x2, x2, x3, sxtw            // dst2 += dstPadding2
> >
> > Since you're doing sxtw, I would have expected to have the last register
> > referenced as wN, not xN. I'd guess that some picky versions of assemblers
> > could error out due to this, so it could be good to change that just to be
> > safe.
>
> Thanks for the review. I fixed this locally.
>
> If there are no further comments, I'll apply this patchset in a few days.

Applied.
diff mbox series

Patch

diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index a5c9917799..239258ab8c 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -221,6 +221,48 @@  static int nv24ToPlanarWrapper(SwsContext *c, const uint8_t *src[],
     return srcSliceH;
 }
 
+static void nv24_to_yuv420p_chroma(uint8_t *dst1, int dstStride1,
+                                   uint8_t *dst2, int dstStride2,
+                                   const uint8_t *src, int srcStride,
+                                   int w, int h)
+{
+    const uint8_t *src1 = src;
+    const uint8_t *src2 = src + srcStride;
+    // average 4 pixels into 1 (interleaved U and V)
+    for (int y = 0; y < h; y += 2) {
+        for (int x = 0; x < w; x++) {
+            dst1[x] = (src1[4 * x + 0] + src1[4 * x + 2] +
+                       src2[4 * x + 0] + src2[4 * x + 2]) >> 2;
+            dst2[x] = (src1[4 * x + 1] + src1[4 * x + 3] +
+                       src2[4 * x + 1] + src2[4 * x + 3]) >> 2;
+        }
+        src1 += srcStride * 2;
+        src2 += srcStride * 2;
+        dst1 += dstStride1;
+        dst2 += dstStride2;
+    }
+}
+
+static int nv24ToYuv420Wrapper(SwsContext *c, const uint8_t *src[],
+                               int srcStride[], int srcSliceY, int srcSliceH,
+                               uint8_t *dstParam[], int dstStride[])
+{
+    uint8_t *dst1 = dstParam[1] + dstStride[1] * srcSliceY / 2;
+    uint8_t *dst2 = dstParam[2] + dstStride[2] * srcSliceY / 2;
+
+    copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->srcW,
+              dstParam[0], dstStride[0]);
+
+    if (c->srcFormat == AV_PIX_FMT_NV24)
+        nv24_to_yuv420p_chroma(dst1, dstStride[1], dst2, dstStride[2],
+                               src[1], srcStride[1], c->srcW / 2, srcSliceH);
+    else
+        nv24_to_yuv420p_chroma(dst2, dstStride[2], dst1, dstStride[1],
+                               src[1], srcStride[1], c->srcW / 2, srcSliceH);
+
+    return srcSliceH;
+}
+
 static int planarToP01xWrapper(SwsContext *c, const uint8_t *src8[],
                                int srcStride[], int srcSliceY,
                                int srcSliceH, uint8_t *dstParam8[],
@@ -2206,6 +2248,9 @@  void ff_get_unscaled_swscale(SwsContext *c)
         c->convert_unscaled = yuyvToYuv422Wrapper;
     if (srcFormat == AV_PIX_FMT_UYVY422 && dstFormat == AV_PIX_FMT_YUV422P)
         c->convert_unscaled = uyvyToYuv422Wrapper;
+    if (dstFormat == AV_PIX_FMT_YUV420P &&
+        (srcFormat == AV_PIX_FMT_NV24 || srcFormat == AV_PIX_FMT_NV42))
+        c->convert_unscaled = nv24ToYuv420Wrapper;
 
 #define isPlanarGray(x) (isGray(x) && (x) != AV_PIX_FMT_YA8 && (x) != AV_PIX_FMT_YA16LE && (x) != AV_PIX_FMT_YA16BE)
     /* simple copy */