diff mbox series

[FFmpeg-devel,5/8] sws: add a new scaling API

Message ID 20210712110709.15532-6-anton@khirnov.net
State New
Headers show
Series [FFmpeg-devel,1/8] tools/venc_data_dump: factor out demux/decode code | expand

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate success Make fate finished
andriy/PPC64_make success Make finished
andriy/PPC64_make_fate success Make fate finished

Commit Message

Anton Khirnov July 12, 2021, 11:07 a.m. UTC
---
 libswscale/swscale.c          | 263 ++++++++++++++++++++++++++--------
 libswscale/swscale.h          |  80 +++++++++++
 libswscale/swscale_internal.h |  19 +++
 libswscale/utils.c            |  70 +++++++++
 4 files changed, 374 insertions(+), 58 deletions(-)

Comments

Michael Niedermayer July 12, 2021, 7:08 p.m. UTC | #1
On Mon, Jul 12, 2021 at 01:07:06PM +0200, Anton Khirnov wrote:
[...]
> diff --git a/libswscale/swscale.h b/libswscale/swscale.h
> index 50d6d46553..41eacd2dea 100644
> --- a/libswscale/swscale.h
> +++ b/libswscale/swscale.h
> @@ -30,6 +30,7 @@
>  #include <stdint.h>
>  
>  #include "libavutil/avutil.h"
> +#include "libavutil/frame.h"
>  #include "libavutil/log.h"
>  #include "libavutil/pixfmt.h"
>  #include "version.h"
> @@ -218,6 +219,85 @@ int sws_scale(struct SwsContext *c, const uint8_t *const srcSlice[],
>                const int srcStride[], int srcSliceY, int srcSliceH,
>                uint8_t *const dst[], const int dstStride[]);
>  
> +/**
> + * Scale source data from src and write the output to dst.
> + *
> + * This is merely a convenience wrapper around
> + * - sws_frame_start()
> + * - sws_send_slice(0, src->height)
> + * - sws_receive_slice(0, dst->height)
> + * - sws_frame_end()
> + *
> + * @param dst The destination frame. See documentation for sws_frame_start() for
> + *            more details.
> + * @param src The source frame.
> + *
> + * @return 0 on success, a negative AVERROR code on failure
> + */
> +int sws_scale_frame(struct SwsContext *c, AVFrame *dst, const AVFrame *src);
> +
> +/**
> + * Initialize the scaling process for a given pair of source/destination frames.
> + * Must be called before any calls to sws_send_slice() and sws_receive_slice().
> + *
> + * This function will retain references to src and dst.
> + *
> + * @param dst The destination frame.
> + *
> + *            The data buffers may either be already allocated by the caller or
> + *            left clear, in which case they will be allocated by the scaler.
> + *            The latter may have performance advantages - e.g. in certain cases
> + *            some output planes may be references to input planes, rather than
> + *            copies.
> + *
> + *            Output data will be written into this frame in successful
> + *            sws_receive_slice() calls.
> + * @param src The source frame. The data buffers must be allocated, but the
> + *            frame data does not have to be ready at this point. Data
> + *            availability is then signalled by sws_send_slice().
> + * @return 0 on success, a negative AVERROR code on failure
> + *
> + * @see sws_frame_end()
> + */
> +int sws_frame_start(struct SwsContext *c, AVFrame *dst, const AVFrame *src);
> +
> +/**
> + * Finish the scaling process for a pair of source/destination frames previously
> + * submitted with sws_frame_start(). Must be called after all sws_send_slice()
> + * and sws_receive_slice() calls are done, before any new sws_frame_start()
> + * calls.
> + */
> +void sws_frame_end(struct SwsContext *c);
> +

> +/**
> + * Indicate that a horizontal slice of input data is available in the source
> + * frame previously provided to sws_frame_start(). The slices may be provided in
> + * any order, but may not overlap. For vertically subsampled pixel formats, the
> + * slices must be aligned according to subsampling.
> + *
> + * @param slice_start first row of the slice
> + * @param slice_height number of rows in the slice
> + *
> + * @return 0 on success, a negative AVERROR code on failure.
> + */
> +int sws_send_slice(struct SwsContext *c, unsigned int slice_start,
> +                   unsigned int slice_height);

I suggest to use non 0 on success.
That could then be extended in the future for example to provide information
about how many lines have already been consumed and its memory be reused

thx

[...]
Anton Khirnov July 13, 2021, 5:54 p.m. UTC | #2
Quoting Michael Niedermayer (2021-07-12 21:08:55)
> On Mon, Jul 12, 2021 at 01:07:06PM +0200, Anton Khirnov wrote:
> [...]
> > diff --git a/libswscale/swscale.h b/libswscale/swscale.h
> > index 50d6d46553..41eacd2dea 100644
> > --- a/libswscale/swscale.h
> > +++ b/libswscale/swscale.h
> > @@ -30,6 +30,7 @@
> >  #include <stdint.h>
> >  
> >  #include "libavutil/avutil.h"
> > +#include "libavutil/frame.h"
> >  #include "libavutil/log.h"
> >  #include "libavutil/pixfmt.h"
> >  #include "version.h"
> > @@ -218,6 +219,85 @@ int sws_scale(struct SwsContext *c, const uint8_t *const srcSlice[],
> >                const int srcStride[], int srcSliceY, int srcSliceH,
> >                uint8_t *const dst[], const int dstStride[]);
> >  
> > +/**
> > + * Scale source data from src and write the output to dst.
> > + *
> > + * This is merely a convenience wrapper around
> > + * - sws_frame_start()
> > + * - sws_send_slice(0, src->height)
> > + * - sws_receive_slice(0, dst->height)
> > + * - sws_frame_end()
> > + *
> > + * @param dst The destination frame. See documentation for sws_frame_start() for
> > + *            more details.
> > + * @param src The source frame.
> > + *
> > + * @return 0 on success, a negative AVERROR code on failure
> > + */
> > +int sws_scale_frame(struct SwsContext *c, AVFrame *dst, const AVFrame *src);
> > +
> > +/**
> > + * Initialize the scaling process for a given pair of source/destination frames.
> > + * Must be called before any calls to sws_send_slice() and sws_receive_slice().
> > + *
> > + * This function will retain references to src and dst.
> > + *
> > + * @param dst The destination frame.
> > + *
> > + *            The data buffers may either be already allocated by the caller or
> > + *            left clear, in which case they will be allocated by the scaler.
> > + *            The latter may have performance advantages - e.g. in certain cases
> > + *            some output planes may be references to input planes, rather than
> > + *            copies.
> > + *
> > + *            Output data will be written into this frame in successful
> > + *            sws_receive_slice() calls.
> > + * @param src The source frame. The data buffers must be allocated, but the
> > + *            frame data does not have to be ready at this point. Data
> > + *            availability is then signalled by sws_send_slice().
> > + * @return 0 on success, a negative AVERROR code on failure
> > + *
> > + * @see sws_frame_end()
> > + */
> > +int sws_frame_start(struct SwsContext *c, AVFrame *dst, const AVFrame *src);
> > +
> > +/**
> > + * Finish the scaling process for a pair of source/destination frames previously
> > + * submitted with sws_frame_start(). Must be called after all sws_send_slice()
> > + * and sws_receive_slice() calls are done, before any new sws_frame_start()
> > + * calls.
> > + */
> > +void sws_frame_end(struct SwsContext *c);
> > +
> 
> > +/**
> > + * Indicate that a horizontal slice of input data is available in the source
> > + * frame previously provided to sws_frame_start(). The slices may be provided in
> > + * any order, but may not overlap. For vertically subsampled pixel formats, the
> > + * slices must be aligned according to subsampling.
> > + *
> > + * @param slice_start first row of the slice
> > + * @param slice_height number of rows in the slice
> > + *
> > + * @return 0 on success, a negative AVERROR code on failure.
> > + */
> > +int sws_send_slice(struct SwsContext *c, unsigned int slice_start,
> > +                   unsigned int slice_height);
> 
> I suggest to use non 0 on success.

Outright >0, or >= 0?

> That could then be extended in the future for example to provide information
> about how many lines have already been consumed and its memory be reused

I will amend the patch.

Are you satisfied with the API otherwise?
Michael Niedermayer July 14, 2021, 5:30 p.m. UTC | #3
On Tue, Jul 13, 2021 at 07:54:18PM +0200, Anton Khirnov wrote:
> Quoting Michael Niedermayer (2021-07-12 21:08:55)
> > On Mon, Jul 12, 2021 at 01:07:06PM +0200, Anton Khirnov wrote:
> > [...]
> > > diff --git a/libswscale/swscale.h b/libswscale/swscale.h
> > > index 50d6d46553..41eacd2dea 100644
> > > --- a/libswscale/swscale.h
> > > +++ b/libswscale/swscale.h
> > > @@ -30,6 +30,7 @@
> > >  #include <stdint.h>
> > >  
> > >  #include "libavutil/avutil.h"
> > > +#include "libavutil/frame.h"
> > >  #include "libavutil/log.h"
> > >  #include "libavutil/pixfmt.h"
> > >  #include "version.h"
> > > @@ -218,6 +219,85 @@ int sws_scale(struct SwsContext *c, const uint8_t *const srcSlice[],
> > >                const int srcStride[], int srcSliceY, int srcSliceH,
> > >                uint8_t *const dst[], const int dstStride[]);
> > >  
> > > +/**
> > > + * Scale source data from src and write the output to dst.
> > > + *
> > > + * This is merely a convenience wrapper around
> > > + * - sws_frame_start()
> > > + * - sws_send_slice(0, src->height)
> > > + * - sws_receive_slice(0, dst->height)
> > > + * - sws_frame_end()
> > > + *
> > > + * @param dst The destination frame. See documentation for sws_frame_start() for
> > > + *            more details.
> > > + * @param src The source frame.
> > > + *
> > > + * @return 0 on success, a negative AVERROR code on failure
> > > + */
> > > +int sws_scale_frame(struct SwsContext *c, AVFrame *dst, const AVFrame *src);
> > > +
> > > +/**
> > > + * Initialize the scaling process for a given pair of source/destination frames.
> > > + * Must be called before any calls to sws_send_slice() and sws_receive_slice().
> > > + *
> > > + * This function will retain references to src and dst.
> > > + *
> > > + * @param dst The destination frame.
> > > + *
> > > + *            The data buffers may either be already allocated by the caller or
> > > + *            left clear, in which case they will be allocated by the scaler.
> > > + *            The latter may have performance advantages - e.g. in certain cases
> > > + *            some output planes may be references to input planes, rather than
> > > + *            copies.
> > > + *
> > > + *            Output data will be written into this frame in successful
> > > + *            sws_receive_slice() calls.
> > > + * @param src The source frame. The data buffers must be allocated, but the
> > > + *            frame data does not have to be ready at this point. Data
> > > + *            availability is then signalled by sws_send_slice().
> > > + * @return 0 on success, a negative AVERROR code on failure
> > > + *
> > > + * @see sws_frame_end()
> > > + */
> > > +int sws_frame_start(struct SwsContext *c, AVFrame *dst, const AVFrame *src);
> > > +
> > > +/**
> > > + * Finish the scaling process for a pair of source/destination frames previously
> > > + * submitted with sws_frame_start(). Must be called after all sws_send_slice()
> > > + * and sws_receive_slice() calls are done, before any new sws_frame_start()
> > > + * calls.
> > > + */
> > > +void sws_frame_end(struct SwsContext *c);
> > > +
> > 
> > > +/**
> > > + * Indicate that a horizontal slice of input data is available in the source
> > > + * frame previously provided to sws_frame_start(). The slices may be provided in
> > > + * any order, but may not overlap. For vertically subsampled pixel formats, the
> > > + * slices must be aligned according to subsampling.
> > > + *
> > > + * @param slice_start first row of the slice
> > > + * @param slice_height number of rows in the slice
> > > + *
> > > + * @return 0 on success, a negative AVERROR code on failure.
> > > + */
> > > +int sws_send_slice(struct SwsContext *c, unsigned int slice_start,
> > > +                   unsigned int slice_height);
> > 
> > I suggest to use non 0 on success.
> 
> Outright >0, or >= 0?

i meant  >= 0 / non negative
 

> 
> > That could then be extended in the future for example to provide information
> > about how many lines have already been consumed and its memory be reused
> 
> I will amend the patch.
> 
> Are you satisfied with the API otherwise?

yes the API in this patch looks nice. I havnt looked over all other code yet

Thanks

[...]
diff mbox series

Patch

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 61dfcb4dff..8b32ce5a40 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -236,13 +236,16 @@  static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
         av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
 
 static int swscale(SwsContext *c, const uint8_t *src[],
-                   int srcStride[], int srcSliceY,
-                   int srcSliceH, uint8_t *dst[], int dstStride[])
+                   int srcStride[], int srcSliceY, int srcSliceH,
+                   uint8_t *dst[], int dstStride[],
+                   int dstSliceY, int dstSliceH)
 {
+    const int scale_dst = dstSliceY > 0 || dstSliceH < c->dstH;
+
     /* load a few things into local vars to make the code more readable?
      * and faster */
     const int dstW                   = c->dstW;
-    const int dstH                   = c->dstH;
+    int dstH                         = c->dstH;
 
     const enum AVPixelFormat dstFormat = c->dstFormat;
     const int flags                  = c->flags;
@@ -331,10 +334,15 @@  static int swscale(SwsContext *c, const uint8_t *src[],
         }
     }
 
-    /* Note the user might start scaling the picture in the middle so this
-     * will not get executed. This is not really intended but works
-     * currently, so people might do it. */
-    if (srcSliceY == 0) {
+    if (scale_dst) {
+        dstY         = dstSliceY;
+        dstH         = dstY + dstSliceH;
+        lastInLumBuf = -1;
+        lastInChrBuf = -1;
+    } else if (srcSliceY == 0) {
+        /* Note the user might start scaling the picture in the middle so this
+         * will not get executed. This is not really intended but works
+         * currently, so people might do it. */
         dstY         = 0;
         lastInLumBuf = -1;
         lastInChrBuf = -1;
@@ -352,8 +360,8 @@  static int swscale(SwsContext *c, const uint8_t *src[],
             srcSliceY, srcSliceH, chrSrcSliceY, chrSrcSliceH, 1);
 
     ff_init_slice_from_src(vout_slice, (uint8_t**)dst, dstStride, c->dstW,
-            dstY, dstH, dstY >> c->chrDstVSubSample,
-            AV_CEIL_RSHIFT(dstH, c->chrDstVSubSample), 0);
+            dstY, dstSliceH, dstY >> c->chrDstVSubSample,
+            AV_CEIL_RSHIFT(dstSliceH, c->chrDstVSubSample), scale_dst);
     if (srcSliceY == 0) {
         hout_slice->plane[0].sliceY = lastInLumBuf + 1;
         hout_slice->plane[1].sliceY = lastInChrBuf + 1;
@@ -373,7 +381,7 @@  static int swscale(SwsContext *c, const uint8_t *src[],
 
         // First line needed as input
         const int firstLumSrcY  = FFMAX(1 - vLumFilterSize, vLumFilterPos[dstY]);
-        const int firstLumSrcY2 = FFMAX(1 - vLumFilterSize, vLumFilterPos[FFMIN(dstY | ((1 << c->chrDstVSubSample) - 1), dstH - 1)]);
+        const int firstLumSrcY2 = FFMAX(1 - vLumFilterSize, vLumFilterPos[FFMIN(dstY | ((1 << c->chrDstVSubSample) - 1), c->dstH - 1)]);
         // First line needed as input
         const int firstChrSrcY  = FFMAX(1 - vChrFilterSize, vChrFilterPos[chrDstY]);
 
@@ -477,7 +485,7 @@  static int swscale(SwsContext *c, const uint8_t *src[],
             c->chrDither8 = ff_dither_8x8_128[chrDstY & 7];
             c->lumDither8 = ff_dither_8x8_128[dstY    & 7];
         }
-        if (dstY >= dstH - 2) {
+        if (dstY >= c->dstH - 2) {
             /* hmm looks like we can't use MMX here without overwriting
              * this array's tail */
             ff_sws_init_output_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
@@ -491,21 +499,22 @@  static int swscale(SwsContext *c, const uint8_t *src[],
             desc[i].process(c, &desc[i], dstY, 1);
     }
     if (isPlanar(dstFormat) && isALPHA(dstFormat) && !needAlpha) {
+        int offset = lastDstY - dstSliceY;
         int length = dstW;
         int height = dstY - lastDstY;
 
         if (is16BPS(dstFormat) || isNBPS(dstFormat)) {
             const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(dstFormat);
-            fillPlane16(dst[3], dstStride[3], length, height, lastDstY,
+            fillPlane16(dst[3], dstStride[3], length, height, offset,
                     1, desc->comp[3].depth,
                     isBE(dstFormat));
         } else if (is32BPS(dstFormat)) {
             const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(dstFormat);
-            fillPlane32(dst[3], dstStride[3], length, height, lastDstY,
+            fillPlane32(dst[3], dstStride[3], length, height, offset,
                     1, desc->comp[3].depth,
                     isBE(dstFormat), desc->flags & AV_PIX_FMT_FLAG_FLOAT);
         } else
-            fillPlane(dst[3], dstStride[3], length, height, lastDstY, 255);
+            fillPlane(dst[3], dstStride[3], length, height, offset, 255);
     }
 
 #if HAVE_MMXEXT_INLINE
@@ -809,33 +818,42 @@  static void update_palette(SwsContext *c, const uint32_t *pal)
     }
 }
 
+static int scale_internal(SwsContext *c,
+                          const uint8_t * const srcSlice[], const int srcStride[],
+                          int srcSliceY, int srcSliceH,
+                          uint8_t *const dstSlice[], const int dstStride[],
+                          int dstSliceY, int dstSliceH);
+
 static int scale_gamma(SwsContext *c,
                        const uint8_t * const srcSlice[], const int srcStride[],
                        int srcSliceY, int srcSliceH,
-                       uint8_t * const dst[], const int dstStride[])
+                       uint8_t * const dstSlice[], const int dstStride[],
+                       int dstSliceY, int dstSliceH)
 {
-    int ret = sws_scale(c->cascaded_context[0],
-                        srcSlice, srcStride, srcSliceY, srcSliceH,
-                        c->cascaded_tmp, c->cascaded_tmpStride);
+    int ret = scale_internal(c->cascaded_context[0],
+                             srcSlice, srcStride, srcSliceY, srcSliceH,
+                             c->cascaded_tmp, c->cascaded_tmpStride, 0, c->srcH);
 
     if (ret < 0)
         return ret;
 
     if (c->cascaded_context[2])
-        ret = sws_scale(c->cascaded_context[1], (const uint8_t * const *)c->cascaded_tmp,
-                        c->cascaded_tmpStride, srcSliceY, srcSliceH, c->cascaded1_tmp,
-                        c->cascaded1_tmpStride);
+        ret = scale_internal(c->cascaded_context[1], (const uint8_t * const *)c->cascaded_tmp,
+                             c->cascaded_tmpStride, srcSliceY, srcSliceH,
+                             c->cascaded1_tmp, c->cascaded1_tmpStride, 0, c->dstH);
     else
-        ret = sws_scale(c->cascaded_context[1], (const uint8_t * const *)c->cascaded_tmp,
-                        c->cascaded_tmpStride, srcSliceY, srcSliceH, dst, dstStride);
+        ret = scale_internal(c->cascaded_context[1], (const uint8_t * const *)c->cascaded_tmp,
+                             c->cascaded_tmpStride, srcSliceY, srcSliceH,
+                             dstSlice, dstStride, dstSliceY, dstSliceH);
 
     if (ret < 0)
         return ret;
 
     if (c->cascaded_context[2]) {
-        ret = sws_scale(c->cascaded_context[2], (const uint8_t * const *)c->cascaded1_tmp,
-                        c->cascaded1_tmpStride, c->cascaded_context[1]->dstY - ret,
-                        c->cascaded_context[1]->dstY, dst, dstStride);
+        ret = scale_internal(c->cascaded_context[2], (const uint8_t * const *)c->cascaded1_tmp,
+                             c->cascaded1_tmpStride, c->cascaded_context[1]->dstY - ret,
+                             c->cascaded_context[1]->dstY,
+                             dstSlice, dstStride, dstSliceY, dstSliceH);
     }
     return ret;
 }
@@ -843,56 +861,64 @@  static int scale_gamma(SwsContext *c,
 static int scale_cascaded(SwsContext *c,
                           const uint8_t * const srcSlice[], const int srcStride[],
                           int srcSliceY, int srcSliceH,
-                          uint8_t * const dst[], const int dstStride[])
+                          uint8_t * const dstSlice[], const int dstStride[],
+                          int dstSliceY, int dstSliceH)
 {
-    int ret = sws_scale(c->cascaded_context[0],
-                        srcSlice, srcStride, srcSliceY, srcSliceH,
-                        c->cascaded_tmp, c->cascaded_tmpStride);
+    int ret = scale_internal(c->cascaded_context[0],
+                             srcSlice, srcStride, srcSliceY, srcSliceH,
+                             c->cascaded_tmp, c->cascaded_tmpStride,
+                             0, c->cascaded_context[0]->dstH);
     if (ret < 0)
         return ret;
-    ret = sws_scale(c->cascaded_context[1],
-                    (const uint8_t * const * )c->cascaded_tmp, c->cascaded_tmpStride,
-                    0, c->cascaded_context[0]->dstH, dst, dstStride);
+    ret = scale_internal(c->cascaded_context[1],
+                         (const uint8_t * const * )c->cascaded_tmp, c->cascaded_tmpStride,
+                         0, c->cascaded_context[0]->dstH,
+                         dstSlice, dstStride, dstSliceY, dstSliceH);
     return ret;
 }
 
-/**
- * swscale wrapper, so we don't need to export the SwsContext.
- * Assumes planar YUV to be in YUV order instead of YVU.
- */
-int attribute_align_arg sws_scale(struct SwsContext *c,
-                                  const uint8_t * const srcSlice[],
-                                  const int srcStride[], int srcSliceY,
-                                  int srcSliceH, uint8_t *const dst[],
-                                  const int dstStride[])
+static int scale_internal(SwsContext *c,
+                          const uint8_t * const srcSlice[], const int srcStride[],
+                          int srcSliceY, int srcSliceH,
+                          uint8_t *const dstSlice[], const int dstStride[],
+                          int dstSliceY, int dstSliceH)
 {
-    const int frame_start = !c->sliceDir;
+    const int scale_dst = dstSliceY > 0 || dstSliceH < c->dstH;
+    const int frame_start = scale_dst || !c->sliceDir;
     int i, ret;
     const uint8_t *src2[4];
     uint8_t *dst2[4];
-    int macro_height = isBayer(c->srcFormat) ? 2 : (1 << c->chrSrcVSubSample);
+    int macro_height_src = isBayer(c->srcFormat) ? 2 : (1 << c->chrSrcVSubSample);
+    int macro_height_dst = isBayer(c->dstFormat) ? 2 : (1 << c->chrDstVSubSample);
     // copy strides, so they can safely be modified
     int srcStride2[4];
     int dstStride2[4];
     int srcSliceY_internal = srcSliceY;
 
-    if (!srcStride || !dstStride || !dst || !srcSlice) {
+    if (!srcStride || !dstStride || !dstSlice || !srcSlice) {
         av_log(c, AV_LOG_ERROR, "One of the input parameters to sws_scale() is NULL, please check the calling code\n");
         return AVERROR(EINVAL);
     }
 
-    if ((srcSliceY & (macro_height-1)) ||
-        ((srcSliceH& (macro_height-1)) && srcSliceY + srcSliceH != c->srcH) ||
+    if ((srcSliceY  & (macro_height_src - 1)) ||
+        ((srcSliceH & (macro_height_src - 1)) && srcSliceY + srcSliceH != c->srcH) ||
         srcSliceY + srcSliceH > c->srcH) {
         av_log(c, AV_LOG_ERROR, "Slice parameters %d, %d are invalid\n", srcSliceY, srcSliceH);
         return AVERROR(EINVAL);
     }
 
+    if ((dstSliceY  & (macro_height_dst - 1)) ||
+        ((dstSliceH & (macro_height_dst - 1)) && dstSliceY + dstSliceH != c->dstH) ||
+        dstSliceY + dstSliceH > c->dstH) {
+        av_log(c, AV_LOG_ERROR, "Slice parameters %d, %d are invalid\n", dstSliceY, dstSliceH);
+        return AVERROR(EINVAL);
+    }
+
     if (!check_image_pointers(srcSlice, c->srcFormat, srcStride)) {
         av_log(c, AV_LOG_ERROR, "bad src image pointers\n");
         return AVERROR(EINVAL);
     }
-    if (!check_image_pointers((const uint8_t* const*)dst, c->dstFormat, dstStride)) {
+    if (!check_image_pointers((const uint8_t* const*)dstSlice, c->dstFormat, dstStride)) {
         av_log(c, AV_LOG_ERROR, "bad dst image pointers\n");
         return AVERROR(EINVAL);
     }
@@ -902,10 +928,12 @@  int attribute_align_arg sws_scale(struct SwsContext *c,
         return 0;
 
     if (c->gamma_flag && c->cascaded_context[0])
-        return scale_gamma(c, srcSlice, srcStride, srcSliceY, srcSliceH, dst, dstStride);
+        return scale_gamma(c, srcSlice, srcStride, srcSliceY, srcSliceH,
+                           dstSlice, dstStride, dstSliceY, dstSliceH);
 
     if (c->cascaded_context[0] && srcSliceY == 0 && srcSliceH == c->cascaded_context[0]->srcH)
-        return scale_cascaded(c, srcSlice, srcStride, srcSliceY, srcSliceH, dst, dstStride);
+        return scale_cascaded(c, srcSlice, srcStride, srcSliceY, srcSliceH,
+                              dstSlice, dstStride, dstSliceY, dstSliceH);
 
     if (!srcSliceY && (c->flags & SWS_BITEXACT) && c->dither == SWS_DITHER_ED && c->dither_error[0])
         for (i = 0; i < 4; i++)
@@ -915,18 +943,19 @@  int attribute_align_arg sws_scale(struct SwsContext *c,
         update_palette(c, (const uint32_t *)srcSlice[1]);
 
     memcpy(src2,       srcSlice,  sizeof(src2));
-    memcpy(dst2,       dst,       sizeof(dst2));
+    memcpy(dst2,       dstSlice,  sizeof(dst2));
     memcpy(srcStride2, srcStride, sizeof(srcStride2));
     memcpy(dstStride2, dstStride, sizeof(dstStride2));
 
-    if (frame_start) {
+    if (frame_start && !scale_dst) {
         if (srcSliceY != 0 && srcSliceY + srcSliceH != c->srcH) {
             av_log(c, AV_LOG_ERROR, "Slices start in the middle!\n");
             return AVERROR(EINVAL);
         }
 
         c->sliceDir = (srcSliceY == 0) ? 1 : -1;
-    }
+    } else if (scale_dst)
+        c->sliceDir = 1;
 
     if (c->src0Alpha && !c->dst0Alpha && isALPHA(c->dstFormat)) {
         uint8_t *base;
@@ -985,11 +1014,28 @@  int attribute_align_arg sws_scale(struct SwsContext *c,
     reset_ptr(src2, c->srcFormat);
     reset_ptr((void*)dst2, c->dstFormat);
 
-    if (c->convert_unscaled)
-        ret = c->convert_unscaled(c, src2, srcStride2, srcSliceY_internal, srcSliceH,
+    if (c->convert_unscaled) {
+        int offset  = srcSliceY_internal;
+        int slice_h = srcSliceH;
+
+        // for dst slice scaling, offset the src pointers to match the dst slice
+        if (scale_dst) {
+            av_assert0(offset == 0);
+            for (i = 0; i < 4 && src2[i]; i++) {
+                if (!src2[i] || (i > 0 && usePal(c->srcFormat)))
+                    break;
+                src2[i] += (dstSliceY >> ((i == 1 || i == 2) ? c->chrSrcVSubSample : 0)) * srcStride2[i];
+            }
+            offset  = 0;
+            slice_h = dstSliceH;
+        }
+
+        ret = c->convert_unscaled(c, src2, srcStride2, offset, slice_h,
                                   dst2, dstStride2);
-    else
-        ret = swscale(c, src2, srcStride2, srcSliceY_internal, srcSliceH, dst2, dstStride2);
+    } else {
+        ret = swscale(c, src2, srcStride2, srcSliceY_internal, srcSliceH,
+                      dst2, dstStride2, dstSliceY, dstSliceH);
+    }
 
     if (c->dstXYZ && !(c->srcXYZ && c->srcW==c->dstW && c->srcH==c->dstH)) {
         int dstY = c->dstY ? c->dstY : srcSliceY + srcSliceH;
@@ -1003,8 +1049,109 @@  int attribute_align_arg sws_scale(struct SwsContext *c,
     }
 
     /* reset slice direction at end of frame */
-    if (srcSliceY_internal + srcSliceH == c->srcH)
+    if ((srcSliceY_internal + srcSliceH == c->srcH) || scale_dst)
         c->sliceDir = 0;
 
     return ret;
 }
+
+void sws_frame_end(struct SwsContext *c)
+{
+    av_frame_unref(c->frame_src);
+    av_frame_unref(c->frame_dst);
+    c->src_ranges.nb_ranges = 0;
+}
+
+int sws_frame_start(struct SwsContext *c, AVFrame *dst, const AVFrame *src)
+{
+    int ret, allocated = 0;
+
+    ret = av_frame_ref(c->frame_src, src);
+    if (ret < 0)
+        return ret;
+
+    if (!dst->buf[0]) {
+        dst->width  = c->dstW;
+        dst->height = c->dstH;
+        dst->format = c->dstFormat;
+
+        ret = av_frame_get_buffer(dst, 0);
+        if (ret < 0)
+            return ret;
+        allocated = 1;
+    }
+
+    ret = av_frame_ref(c->frame_dst, dst);
+    if (ret < 0) {
+        if (allocated)
+            av_frame_unref(dst);
+
+        return ret;
+    }
+
+    return 0;
+}
+
+int sws_send_slice(struct SwsContext *c, unsigned int slice_start,
+                   unsigned int slice_height)
+{
+    int ret;
+
+    ret = ff_range_add(&c->src_ranges, slice_start, slice_height);
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
+
+int sws_receive_slice(struct SwsContext *c, unsigned int slice_start,
+                      unsigned int slice_height)
+{
+    uint8_t *dst[4];
+
+    /* wait until complete input has been received */
+    if (!(c->src_ranges.nb_ranges == 1        &&
+          c->src_ranges.ranges[0].start == 0 &&
+          c->src_ranges.ranges[0].len == c->srcH))
+        return AVERROR(EAGAIN);
+
+    for (int i = 0; i < FF_ARRAY_ELEMS(dst) && c->frame_dst->data[i]; i++) {
+        dst[i] = c->frame_dst->data[i] +
+                 c->frame_dst->linesize[i] * (slice_start >> c->chrDstVSubSample);
+    }
+
+    return scale_internal(c, (const uint8_t * const *)c->frame_src->data,
+                          c->frame_src->linesize, 0, c->srcH,
+                          dst, c->frame_dst->linesize, slice_start, slice_height);
+}
+
+int sws_scale_frame(struct SwsContext *c, AVFrame *dst, const AVFrame *src)
+{
+    int ret;
+
+    ret = sws_frame_start(c, dst, src);
+    if (ret < 0)
+        return ret;
+
+    ret = sws_send_slice(c, 0, src->height);
+    if (ret >= 0)
+        ret = sws_receive_slice(c, 0, dst->height);
+
+    sws_frame_end(c);
+
+    return ret;
+}
+
+/**
+ * swscale wrapper, so we don't need to export the SwsContext.
+ * Assumes planar YUV to be in YUV order instead of YVU.
+ */
+int attribute_align_arg sws_scale(struct SwsContext *c,
+                                  const uint8_t * const srcSlice[],
+                                  const int srcStride[], int srcSliceY,
+                                  int srcSliceH, uint8_t *const dst[],
+                                  const int dstStride[])
+{
+    return scale_internal(c, srcSlice, srcStride, srcSliceY, srcSliceH,
+                          dst, dstStride, 0, c->dstH);
+}
diff --git a/libswscale/swscale.h b/libswscale/swscale.h
index 50d6d46553..41eacd2dea 100644
--- a/libswscale/swscale.h
+++ b/libswscale/swscale.h
@@ -30,6 +30,7 @@ 
 #include <stdint.h>
 
 #include "libavutil/avutil.h"
+#include "libavutil/frame.h"
 #include "libavutil/log.h"
 #include "libavutil/pixfmt.h"
 #include "version.h"
@@ -218,6 +219,85 @@  int sws_scale(struct SwsContext *c, const uint8_t *const srcSlice[],
               const int srcStride[], int srcSliceY, int srcSliceH,
               uint8_t *const dst[], const int dstStride[]);
 
+/**
+ * Scale source data from src and write the output to dst.
+ *
+ * This is merely a convenience wrapper around
+ * - sws_frame_start()
+ * - sws_send_slice(0, src->height)
+ * - sws_receive_slice(0, dst->height)
+ * - sws_frame_end()
+ *
+ * @param dst The destination frame. See documentation for sws_frame_start() for
+ *            more details.
+ * @param src The source frame.
+ *
+ * @return 0 on success, a negative AVERROR code on failure
+ */
+int sws_scale_frame(struct SwsContext *c, AVFrame *dst, const AVFrame *src);
+
+/**
+ * Initialize the scaling process for a given pair of source/destination frames.
+ * Must be called before any calls to sws_send_slice() and sws_receive_slice().
+ *
+ * This function will retain references to src and dst.
+ *
+ * @param dst The destination frame.
+ *
+ *            The data buffers may either be already allocated by the caller or
+ *            left clear, in which case they will be allocated by the scaler.
+ *            The latter may have performance advantages - e.g. in certain cases
+ *            some output planes may be references to input planes, rather than
+ *            copies.
+ *
+ *            Output data will be written into this frame in successful
+ *            sws_receive_slice() calls.
+ * @param src The source frame. The data buffers must be allocated, but the
+ *            frame data does not have to be ready at this point. Data
+ *            availability is then signalled by sws_send_slice().
+ * @return 0 on success, a negative AVERROR code on failure
+ *
+ * @see sws_frame_end()
+ */
+int sws_frame_start(struct SwsContext *c, AVFrame *dst, const AVFrame *src);
+
+/**
+ * Finish the scaling process for a pair of source/destination frames previously
+ * submitted with sws_frame_start(). Must be called after all sws_send_slice()
+ * and sws_receive_slice() calls are done, before any new sws_frame_start()
+ * calls.
+ */
+void sws_frame_end(struct SwsContext *c);
+
+/**
+ * Indicate that a horizontal slice of input data is available in the source
+ * frame previously provided to sws_frame_start(). The slices may be provided in
+ * any order, but may not overlap. For vertically subsampled pixel formats, the
+ * slices must be aligned according to subsampling.
+ *
+ * @param slice_start first row of the slice
+ * @param slice_height number of rows in the slice
+ *
+ * @return 0 on success, a negative AVERROR code on failure.
+ */
+int sws_send_slice(struct SwsContext *c, unsigned int slice_start,
+                   unsigned int slice_height);
+
+/**
+ * Request a horizontal slice of the output data to be written into the frame
+ * previously provided to sws_frame_start().
+ *
+ * @param slice_start first row of the slice
+ * @param slice_height number of rows in the slice
+ *
+ * @return 0 if the data was successfully written into the output
+ *         AVERROR(EAGAIN) if more input data needs to be provided before the
+ *                         output can be produced
+ *         another negative AVERROR code on other kinds of scaling failure
+ */
+int sws_receive_slice(struct SwsContext *c, unsigned int slice_start,
+                      unsigned int slice_height);
+
 /**
  * @param dstRange flag indicating the while-black range of the output (1=jpeg / 0=mpeg)
  * @param srcRange flag indicating the while-black range of the input (1=jpeg / 0=mpeg)
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 673407636a..c1098d6026 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -27,6 +27,7 @@ 
 #include "libavutil/avassert.h"
 #include "libavutil/avutil.h"
 #include "libavutil/common.h"
+#include "libavutil/frame.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/log.h"
 #include "libavutil/mem_internal.h"
@@ -80,6 +81,19 @@  typedef enum SwsAlphaBlend {
     SWS_ALPHA_BLEND_NB,
 } SwsAlphaBlend;
 
+typedef struct Range {
+    unsigned int start;
+    unsigned int len;
+} Range;
+
+typedef struct RangeList {
+    Range          *ranges;
+    unsigned int nb_ranges;
+    int             ranges_allocated;
+} RangeList;
+
+int ff_range_add(RangeList *r, unsigned int start, unsigned int len);
+
 typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t *src[],
                        int srcStride[], int srcSliceY, int srcSliceH,
                        uint8_t *dst[], int dstStride[]);
@@ -313,6 +327,11 @@  typedef struct SwsContext {
     int sliceDir;                 ///< Direction that slices are fed to the scaler (1 = top-to-bottom, -1 = bottom-to-top).
     double param[2];              ///< Input parameters for scaling algorithms that need them.
 
+    AVFrame *frame_src;
+    AVFrame *frame_dst;
+
+    RangeList src_ranges;
+
     /* The cascaded_* fields allow spliting a scaler task into multiple
      * sequential steps, this is for example used to limit the maximum
      * downscaling factor that needs to be supported in one scaler.
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 176fc6fd63..dbb907d761 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -1761,6 +1761,11 @@  av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
         if (!FF_ALLOCZ_TYPED_ARRAY(c->dither_error[i], c->dstW + 2))
             goto nomem;
 
+    c->frame_src = av_frame_alloc();
+    c->frame_dst = av_frame_alloc();
+    if (!c->frame_src || !c->frame_dst)
+        goto nomem;
+
     c->needAlpha = (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat) && isALPHA(c->dstFormat)) ? 1 : 0;
 
     // 64 / c->scalingBpp is the same as 16 / sizeof(scaling_intermediate)
@@ -2250,6 +2255,11 @@  void sws_freeContext(SwsContext *c)
     for (i = 0; i < 4; i++)
         av_freep(&c->dither_error[i]);
 
+    av_frame_free(&c->frame_src);
+    av_frame_free(&c->frame_dst);
+
+    av_freep(&c->src_ranges.ranges);
+
     av_freep(&c->vLumFilter);
     av_freep(&c->vChrFilter);
     av_freep(&c->hLumFilter);
@@ -2364,3 +2374,63 @@  struct SwsContext *sws_getCachedContext(struct SwsContext *context, int srcW,
     }
     return context;
 }
+
+int ff_range_add(RangeList *rl, unsigned int start, unsigned int len)
+{
+    Range *tmp;
+    unsigned int idx;
+
+    /* find the first existing range after the new one */
+    for (idx = 0; idx < rl->nb_ranges; idx++)
+        if (rl->ranges[idx].start > start)
+            break;
+
+    /* check for overlap */
+    if (idx > 0) {
+        Range *prev = &rl->ranges[idx - 1];
+        if (prev->start + prev->len > start)
+            return AVERROR(EINVAL);
+    }
+    if (idx < rl->nb_ranges) {
+        Range *next = &rl->ranges[idx];
+        if (start + len > next->start)
+            return AVERROR(EINVAL);
+    }
+
+    tmp = av_fast_realloc(rl->ranges, &rl->ranges_allocated,
+                          (rl->nb_ranges + 1) * sizeof(*rl->ranges));
+    if (!tmp)
+        return AVERROR(ENOMEM);
+    rl->ranges = tmp;
+
+    memmove(rl->ranges + idx + 1, rl->ranges + idx,
+            sizeof(*rl->ranges) * (rl->nb_ranges - idx));
+    rl->ranges[idx].start = start;
+    rl->ranges[idx].len   = len;
+    rl->nb_ranges++;
+
+    /* merge ranges */
+    if (idx > 0) {
+        Range *prev = &rl->ranges[idx - 1];
+        Range *cur  = &rl->ranges[idx];
+        if (prev->start + prev->len == cur->start) {
+            prev->len += cur->len;
+            memmove(rl->ranges + idx - 1, rl->ranges + idx,
+                    sizeof(*rl->ranges) * (rl->nb_ranges - idx));
+            rl->nb_ranges--;
+            idx--;
+        }
+    }
+    if (idx < rl->nb_ranges - 1) {
+        Range *cur  = &rl->ranges[idx];
+        Range *next = &rl->ranges[idx + 1];
+        if (cur->start + cur->len == next->start) {
+            cur->len += next->len;
+            memmove(rl->ranges + idx, rl->ranges + idx + 1,
+                    sizeof(*rl->ranges) * (rl->nb_ranges - idx - 1));
+            rl->nb_ranges--;
+        }
+    }
+
+    return 0;
+}