diff mbox

[FFmpeg-devel] swscale: add unscaled copy from yuv420p10 to p010

Message ID 20160901152304.5689-1-timo@rothenpieler.org
State Superseded
Headers show

Commit Message

Timo Rothenpieler Sept. 1, 2016, 3:23 p.m. UTC
---
 libswscale/swscale_unscaled.c | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

Comments

Michael Niedermayer Sept. 1, 2016, 4:20 p.m. UTC | #1
On Thu, Sep 01, 2016 at 05:23:04PM +0200, Timo Rothenpieler wrote:
> ---
>  libswscale/swscale_unscaled.c | 39 +++++++++++++++++++++++++++++++++++++++
>  1 file changed, 39 insertions(+)
> 
> diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
> index b231abe..51768fa 100644
> --- a/libswscale/swscale_unscaled.c
> +++ b/libswscale/swscale_unscaled.c
> @@ -197,6 +197,40 @@ static int nv12ToPlanarWrapper(SwsContext *c, const uint8_t *src[],
>      return srcSliceH;
>  }
>  
> +static int planarToP010Wrapper(SwsContext *c, const uint8_t *src8[],
> +                               int srcStride[], int srcSliceY,
> +                               int srcSliceH, uint8_t *dstParam8[],
> +                               int dstStride[])
> +{
> +    uint16_t *src[] = {
> +        (uint16_t*)(src8[0] + srcStride[0] * srcSliceY),
> +        (uint16_t*)(src8[1] + srcStride[1] * srcSliceY),
> +        (uint16_t*)(src8[2] + srcStride[2] * srcSliceY)
> +    };
> +    uint16_t *dstY = (uint16_t*)(dstParam8[0] + dstStride[0] * srcSliceY);
> +    uint16_t *dstUV = (uint16_t*)(dstParam8[1] + dstStride[1] * srcSliceY / 2);
> +    int x, y;
> +
> +    for (y = srcSliceY; y < srcSliceY + srcSliceH; y++) {
> +        if (!(y & 1)) {
> +            for (x = 0; x < c->srcW / 2; x++) {
> +                dstUV[x*2  ] = src[1][x] << 6;
> +                dstUV[x*2+1] = src[2][x] << 6;
> +            }
> +            src[1] += srcStride[1] / 2;
> +            src[2] += srcStride[2] / 2;
> +            dstUV += dstStride[1] / 2;
> +        }
> +        for (x = 0; x < c->srcW; x++) {
> +            dstY[x] = src[0][x] << 6;
> +        }
> +        src[0] += srcStride[0] / 2;
> +        dstY += dstStride[0] / 2;
> +    }
> +
> +    return srcSliceH;
> +}

I think some check for strides to be a multiple of 2 should be added
unless thats already checked somewhere
LGTM otherwise

[...]
Timo Rothenpieler Sept. 1, 2016, 4:44 p.m. UTC | #2
On 9/1/2016 6:20 PM, Michael Niedermayer wrote:
> On Thu, Sep 01, 2016 at 05:23:04PM +0200, Timo Rothenpieler wrote:
>> ---
>>  libswscale/swscale_unscaled.c | 39 +++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 39 insertions(+)
>>
>> diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
>> index b231abe..51768fa 100644
>> --- a/libswscale/swscale_unscaled.c
>> +++ b/libswscale/swscale_unscaled.c
>> @@ -197,6 +197,40 @@ static int nv12ToPlanarWrapper(SwsContext *c, const uint8_t *src[],
>>      return srcSliceH;
>>  }
>>  
>> +static int planarToP010Wrapper(SwsContext *c, const uint8_t *src8[],
>> +                               int srcStride[], int srcSliceY,
>> +                               int srcSliceH, uint8_t *dstParam8[],
>> +                               int dstStride[])
>> +{
>> +    uint16_t *src[] = {
>> +        (uint16_t*)(src8[0] + srcStride[0] * srcSliceY),
>> +        (uint16_t*)(src8[1] + srcStride[1] * srcSliceY),
>> +        (uint16_t*)(src8[2] + srcStride[2] * srcSliceY)
>> +    };
>> +    uint16_t *dstY = (uint16_t*)(dstParam8[0] + dstStride[0] * srcSliceY);
>> +    uint16_t *dstUV = (uint16_t*)(dstParam8[1] + dstStride[1] * srcSliceY / 2);
>> +    int x, y;
>> +
>> +    for (y = srcSliceY; y < srcSliceY + srcSliceH; y++) {
>> +        if (!(y & 1)) {
>> +            for (x = 0; x < c->srcW / 2; x++) {
>> +                dstUV[x*2  ] = src[1][x] << 6;
>> +                dstUV[x*2+1] = src[2][x] << 6;
>> +            }
>> +            src[1] += srcStride[1] / 2;
>> +            src[2] += srcStride[2] / 2;
>> +            dstUV += dstStride[1] / 2;
>> +        }
>> +        for (x = 0; x < c->srcW; x++) {
>> +            dstY[x] = src[0][x] << 6;
>> +        }
>> +        src[0] += srcStride[0] / 2;
>> +        dstY += dstStride[0] / 2;
>> +    }
>> +
>> +    return srcSliceH;
>> +}
> 
> I think some check for strides to be a multiple of 2 should be added
> unless thats already checked somewhere
> LGTM otherwise

Is there really a way for them to not be a multiple of 2 with a 10bit
format?

But adding some asserts probably won't hurt.
Michael Niedermayer Sept. 1, 2016, 6:01 p.m. UTC | #3
On Thu, Sep 01, 2016 at 06:44:56PM +0200, Timo Rothenpieler wrote:
> On 9/1/2016 6:20 PM, Michael Niedermayer wrote:
> > On Thu, Sep 01, 2016 at 05:23:04PM +0200, Timo Rothenpieler wrote:
> >> ---
> >>  libswscale/swscale_unscaled.c | 39 +++++++++++++++++++++++++++++++++++++++
> >>  1 file changed, 39 insertions(+)
> >>
> >> diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
> >> index b231abe..51768fa 100644
> >> --- a/libswscale/swscale_unscaled.c
> >> +++ b/libswscale/swscale_unscaled.c
> >> @@ -197,6 +197,40 @@ static int nv12ToPlanarWrapper(SwsContext *c, const uint8_t *src[],
> >>      return srcSliceH;
> >>  }
> >>  
> >> +static int planarToP010Wrapper(SwsContext *c, const uint8_t *src8[],
> >> +                               int srcStride[], int srcSliceY,
> >> +                               int srcSliceH, uint8_t *dstParam8[],
> >> +                               int dstStride[])
> >> +{
> >> +    uint16_t *src[] = {
> >> +        (uint16_t*)(src8[0] + srcStride[0] * srcSliceY),
> >> +        (uint16_t*)(src8[1] + srcStride[1] * srcSliceY),
> >> +        (uint16_t*)(src8[2] + srcStride[2] * srcSliceY)
> >> +    };
> >> +    uint16_t *dstY = (uint16_t*)(dstParam8[0] + dstStride[0] * srcSliceY);
> >> +    uint16_t *dstUV = (uint16_t*)(dstParam8[1] + dstStride[1] * srcSliceY / 2);
> >> +    int x, y;
> >> +
> >> +    for (y = srcSliceY; y < srcSliceY + srcSliceH; y++) {
> >> +        if (!(y & 1)) {
> >> +            for (x = 0; x < c->srcW / 2; x++) {
> >> +                dstUV[x*2  ] = src[1][x] << 6;
> >> +                dstUV[x*2+1] = src[2][x] << 6;
> >> +            }
> >> +            src[1] += srcStride[1] / 2;
> >> +            src[2] += srcStride[2] / 2;
> >> +            dstUV += dstStride[1] / 2;
> >> +        }
> >> +        for (x = 0; x < c->srcW; x++) {
> >> +            dstY[x] = src[0][x] << 6;
> >> +        }
> >> +        src[0] += srcStride[0] / 2;
> >> +        dstY += dstStride[0] / 2;
> >> +    }
> >> +
> >> +    return srcSliceH;
> >> +}
> > 
> > I think some check for strides to be a multiple of 2 should be added
> > unless thats already checked somewhere
> > LGTM otherwise
> 
> Is there really a way for them to not be a multiple of 2 with a 10bit
> format?

is there some code that stops a user from passing such linesizes ?


[...]
diff mbox

Patch

diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index b231abe..51768fa 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -197,6 +197,40 @@  static int nv12ToPlanarWrapper(SwsContext *c, const uint8_t *src[],
     return srcSliceH;
 }
 
+static int planarToP010Wrapper(SwsContext *c, const uint8_t *src8[],
+                               int srcStride[], int srcSliceY,
+                               int srcSliceH, uint8_t *dstParam8[],
+                               int dstStride[])
+{
+    uint16_t *src[] = {
+        (uint16_t*)(src8[0] + srcStride[0] * srcSliceY),
+        (uint16_t*)(src8[1] + srcStride[1] * srcSliceY),
+        (uint16_t*)(src8[2] + srcStride[2] * srcSliceY)
+    };
+    uint16_t *dstY = (uint16_t*)(dstParam8[0] + dstStride[0] * srcSliceY);
+    uint16_t *dstUV = (uint16_t*)(dstParam8[1] + dstStride[1] * srcSliceY / 2);
+    int x, y;
+
+    for (y = srcSliceY; y < srcSliceY + srcSliceH; y++) {
+        if (!(y & 1)) {
+            for (x = 0; x < c->srcW / 2; x++) {
+                dstUV[x*2  ] = src[1][x] << 6;
+                dstUV[x*2+1] = src[2][x] << 6;
+            }
+            src[1] += srcStride[1] / 2;
+            src[2] += srcStride[2] / 2;
+            dstUV += dstStride[1] / 2;
+        }
+        for (x = 0; x < c->srcW; x++) {
+            dstY[x] = src[0][x] << 6;
+        }
+        src[0] += srcStride[0] / 2;
+        dstY += dstStride[0] / 2;
+    }
+
+    return srcSliceH;
+}
+
 static int planarToYuy2Wrapper(SwsContext *c, const uint8_t *src[],
                                int srcStride[], int srcSliceY, int srcSliceH,
                                uint8_t *dstParam[], int dstStride[])
@@ -1600,6 +1634,11 @@  void ff_get_unscaled_swscale(SwsContext *c)
         !(flags & SWS_ACCURATE_RND) && (c->dither == SWS_DITHER_BAYER || c->dither == SWS_DITHER_AUTO) && !(dstH & 1)) {
         c->swscale = ff_yuv2rgb_get_func_ptr(c);
     }
+    /* yuv420p10le_to_p010le */
+    if ((srcFormat == AV_PIX_FMT_YUV420P10 || srcFormat == AV_PIX_FMT_YUVA420P10) &&
+        dstFormat == AV_PIX_FMT_P010) {
+        c->swscale = planarToP010Wrapper;
+    }
 
     if (srcFormat == AV_PIX_FMT_YUV410P && !(dstH & 3) &&
         (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) &&