@@ -609,6 +609,7 @@ extern char *videotoolbox_pixfmt;
extern int filter_nbthreads;
extern int filter_complex_nbthreads;
+extern int filter_scale_nbthreads;
extern int vstats_version;
extern const AVIOInterruptCB int_cb;
@@ -1011,6 +1011,9 @@ int configure_filtergraph(FilterGraph *fg)
AVDictionaryEntry *e = NULL;
fg->graph->nb_threads = filter_nbthreads;
+#if HAVE_THREADS
+ fg->graph->sws_nbthreads = filter_scale_nbthreads;
+#endif
args[0] = 0;
while ((e = av_dict_get(ost->sws_dict, "", e,
@@ -109,6 +109,9 @@ int frame_bits_per_raw_sample = 0;
float max_error_rate = 2.0/3;
int filter_nbthreads = 0;
int filter_complex_nbthreads = 0;
+#if HAVE_THREADS
+int filter_scale_nbthreads = 0;
+#endif
int vstats_version = 2;
@@ -3497,6 +3500,10 @@ const OptionDef options[] = {
{ "disposition", OPT_STRING | HAS_ARG | OPT_SPEC |
OPT_OUTPUT, { .off = OFFSET(disposition) },
"disposition", "" },
+#if HAVE_THREADS
+ { "filter_scale_threads", HAS_ARG | OPT_INT, { &filter_scale_nbthreads },
+ "number of threads for scale filter" },
+#endif
{ "thread_queue_size", HAS_ARG | OPT_INT | OPT_OFFSET | OPT_EXPERT | OPT_INPUT,
{ .off = OFFSET(thread_queue_size) },
"set the maximum number of queued packets from the demuxer" },
@@ -422,6 +422,16 @@ struct AVFilterContext {
* configured.
*/
int extra_hw_frames;
+
+
+#if HAVE_THREADS
+ /**
+ * Number of threads to processing scale
+ */
+ int sws_slice_nbthreads;
+
+#endif
+
};
/**
@@ -907,6 +917,14 @@ typedef struct AVFilterGraph {
int sink_links_count;
unsigned disable_auto_convert;
+
+#if HAVE_THREADS
+ /**
+ * Number of threads to processing scale
+ */
+ int sws_nbthreads;
+#endif
+
} AVFilterGraph;
/**
@@ -257,6 +257,10 @@ static int graph_config_links(AVFilterGraph *graph, AVClass *log_ctx)
for (i = 0; i < graph->nb_filters; i++) {
filt = graph->filters[i];
+#if HAVE_THREADS
+ filt->sws_slice_nbthreads = graph->sws_nbthreads;
+#endif
+
if (!filt->nb_outputs) {
if ((ret = avfilter_config_links(filt)))
return ret;
@@ -299,6 +299,10 @@ static int config_props(AVFilterLink *outlink)
av_opt_set_int(*s, "sws_flags", scale->flags, 0);
av_opt_set_int(*s, "param0", scale->param[0], 0);
av_opt_set_int(*s, "param1", scale->param[1], 0);
+#if HAVE_THREADS
+ av_opt_set_int(*s, "sw_nbthreads", ctx->sws_slice_nbthreads, 0);
+#endif
+
if (scale->in_range != AVCOL_RANGE_UNSPECIFIED)
av_opt_set_int(*s, "src_range",
scale->in_range == AVCOL_RANGE_JPEG, 0);
@@ -80,6 +80,9 @@ static const AVOption swscale_options[] = {
{ "none", "ignore alpha", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_ALPHA_BLEND_NONE}, INT_MIN, INT_MAX, VE, "alphablend" },
{ "uniform_color", "blend onto a uniform color", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_ALPHA_BLEND_UNIFORM},INT_MIN, INT_MAX, VE, "alphablend" },
{ "checkerboard", "blend onto a checkerboard", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_ALPHA_BLEND_CHECKERBOARD},INT_MIN, INT_MAX, VE, "alphablend" },
+#if HAVE_THREADS
+ { "sw_nbthreads", "Threads number for scaling", OFFSET(sw_nbthreads), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 128, VE },
+#endif
{ NULL }
};
@@ -288,8 +288,13 @@ int ff_init_filters(SwsContext * c)
c->slice = av_mallocz_array(sizeof(SwsSlice), c->numSlice);
- res = alloc_slice(&c->slice[0], c->srcFormat, c->srcH, c->chrSrcH, c->chrSrcHSubSample, c->chrSrcVSubSample, 0);
- if (res < 0) goto cleanup;
+ if(!c->parent) {
+ res = alloc_slice(&c->slice[0], c->srcFormat, c->srcH, c->chrSrcH, c->chrSrcHSubSample, c->chrSrcVSubSample, 0);
+ if (res < 0) goto cleanup;
+ }
+ else {
+ memcpy(&c->slice[0],&c->parent->slice[0],sizeof(SwsSlice));
+ }
for (i = 1; i < c->numSlice-2; ++i) {
res = alloc_slice(&c->slice[i], c->srcFormat, lumBufSize, chrBufSize, c->chrSrcHSubSample, c->chrSrcVSubSample, 0);
if (res < 0) goto cleanup;
@@ -306,8 +311,13 @@ int ff_init_filters(SwsContext * c)
// vertical scaler output
++i;
- res = alloc_slice(&c->slice[i], c->dstFormat, c->dstH, c->chrDstH, c->chrDstHSubSample, c->chrDstVSubSample, 0);
- if (res < 0) goto cleanup;
+ if(!c->parent) {
+ res = alloc_slice(&c->slice[i], c->dstFormat, c->dstH, c->chrDstH, c->chrDstHSubSample, c->chrDstVSubSample, 0);
+ if (res < 0) goto cleanup;
+ }
+ else {
+ memcpy(&c->slice[i],&c->parent->slice[i],sizeof(SwsSlice));
+ }
index = 0;
srcIdx = 0;
@@ -320,6 +330,10 @@ int ff_init_filters(SwsContext * c)
}
if (need_lum_conv) {
+#if HAVE_THREADS
+ /* Not support Multitreading for lumia convert */
+ c->sw_nbthreads = 0;
+#endif
res = ff_init_desc_fmt_convert(&c->desc[index], &c->slice[srcIdx], &c->slice[dstIdx], pal);
if (res < 0) goto cleanup;
c->desc[index].alpha = c->needAlpha;
@@ -384,8 +398,14 @@ int ff_free_filters(SwsContext *c)
}
if (c->slice) {
- for (i = 0; i < c->numSlice; ++i)
- free_slice(&c->slice[i]);
+ if(c->parent) {
+ for (i = 1; i < c->numSlice-1; ++i)
+ free_slice(&c->slice[i]);
+ }
+ else {
+ for (i = 0; i < c->numSlice; ++i)
+ free_slice(&c->slice[i]);
+ }
av_freep(&c->slice);
}
return 0;
@@ -22,6 +22,7 @@
#include <math.h>
#include <stdio.h>
#include <string.h>
+#include <pthread.h>
#include "libavutil/avassert.h"
#include "libavutil/avutil.h"
@@ -234,151 +235,51 @@ static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
if (DEBUG_SWSCALE_BUFFERS) \
av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
-static int swscale(SwsContext *c, const uint8_t *src[],
- int srcStride[], int srcSliceY,
- int srcSliceH, uint8_t *dst[], int dstStride[])
+
+static void swscale_step(SwsContext *c)
{
- /* load a few things into local vars to make the code more readable?
- * and faster */
- const int dstW = c->dstW;
- const int dstH = c->dstH;
+ SwsContextStep *step = &c->step_param;
+ int dstY= step->dstY;
+ int dstHend= step->dstHend;
+ int dstH= step->dstH;
+ int srcSliceY= step->srcSliceY;
+ int srcSliceH= step->srcSliceH;
- const enum AVPixelFormat dstFormat = c->dstFormat;
- const int flags = c->flags;
- int32_t *vLumFilterPos = c->vLumFilterPos;
- int32_t *vChrFilterPos = c->vChrFilterPos;
+ const int32_t *vLumFilterPos = c->vLumFilterPos;
+ const int32_t *vChrFilterPos = c->vChrFilterPos;
const int vLumFilterSize = c->vLumFilterSize;
const int vChrFilterSize = c->vChrFilterSize;
- yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
- yuv2planarX_fn yuv2planeX = c->yuv2planeX;
- yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
- yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
- yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
- yuv2packedX_fn yuv2packedX = c->yuv2packedX;
- yuv2anyX_fn yuv2anyX = c->yuv2anyX;
- const int chrSrcSliceY = srcSliceY >> c->chrSrcVSubSample;
+ const int chrSrcSliceY = srcSliceY >> c->chrSrcVSubSample;
const int chrSrcSliceH = AV_CEIL_RSHIFT(srcSliceH, c->chrSrcVSubSample);
- int should_dither = isNBPS(c->srcFormat) ||
+ const int should_dither = isNBPS(c->srcFormat) ||
is16BPS(c->srcFormat);
- int lastDstY;
/* vars which will change and which we need to store back in the context */
- int dstY = c->dstY;
int lumBufIndex = c->lumBufIndex;
int chrBufIndex = c->chrBufIndex;
int lastInLumBuf = c->lastInLumBuf;
int lastInChrBuf = c->lastInChrBuf;
-
- int lumStart = 0;
- int lumEnd = c->descIndex[0];
- int chrStart = lumEnd;
- int chrEnd = c->descIndex[1];
- int vStart = chrEnd;
- int vEnd = c->numDesc;
- SwsSlice *src_slice = &c->slice[lumStart];
+ const int lumStart = 0;
+ const int lumEnd = c->descIndex[0];
+ const int chrStart = lumEnd;
+ const int chrEnd = c->descIndex[1];
+ const int vStart = chrEnd;
+ const int vEnd = c->numDesc;
SwsSlice *hout_slice = &c->slice[c->numSlice-2];
- SwsSlice *vout_slice = &c->slice[c->numSlice-1];
SwsFilterDescriptor *desc = c->desc;
-
- int needAlpha = c->needAlpha;
-
int hasLumHoles = 1;
int hasChrHoles = 1;
+ int refreshBuff = 1;
- if (isPacked(c->srcFormat)) {
- src[0] =
- src[1] =
- src[2] =
- src[3] = src[0];
- srcStride[0] =
- srcStride[1] =
- srcStride[2] =
- srcStride[3] = srcStride[0];
- }
- srcStride[1] <<= c->vChrDrop;
- srcStride[2] <<= c->vChrDrop;
-
- DEBUG_BUFFERS("swscale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
- src[0], srcStride[0], src[1], srcStride[1],
- src[2], srcStride[2], src[3], srcStride[3],
- dst[0], dstStride[0], dst[1], dstStride[1],
- dst[2], dstStride[2], dst[3], dstStride[3]);
- DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
- srcSliceY, srcSliceH, dstY, dstH);
- DEBUG_BUFFERS("vLumFilterSize: %d vChrFilterSize: %d\n",
- vLumFilterSize, vChrFilterSize);
-
- if (dstStride[0]&15 || dstStride[1]&15 ||
- dstStride[2]&15 || dstStride[3]&15) {
- static int warnedAlready = 0; // FIXME maybe move this into the context
- if (flags & SWS_PRINT_INFO && !warnedAlready) {
- av_log(c, AV_LOG_WARNING,
- "Warning: dstStride is not aligned!\n"
- " ->cannot do aligned memory accesses anymore\n");
- warnedAlready = 1;
- }
- }
-
- if ( (uintptr_t)dst[0]&15 || (uintptr_t)dst[1]&15 || (uintptr_t)dst[2]&15
- || (uintptr_t)src[0]&15 || (uintptr_t)src[1]&15 || (uintptr_t)src[2]&15
- || dstStride[0]&15 || dstStride[1]&15 || dstStride[2]&15 || dstStride[3]&15
- || srcStride[0]&15 || srcStride[1]&15 || srcStride[2]&15 || srcStride[3]&15
- ) {
- static int warnedAlready=0;
- int cpu_flags = av_get_cpu_flags();
- if (HAVE_MMXEXT && (cpu_flags & AV_CPU_FLAG_SSE2) && !warnedAlready){
- av_log(c, AV_LOG_WARNING, "Warning: data is not aligned! This can lead to a speed loss\n");
- warnedAlready=1;
- }
- }
-
- /* Note the user might start scaling the picture in the middle so this
- * will not get executed. This is not really intended but works
- * currently, so people might do it. */
- if (srcSliceY == 0) {
- lumBufIndex = -1;
- chrBufIndex = -1;
- dstY = 0;
- lastInLumBuf = -1;
- lastInChrBuf = -1;
- }
-
- if (!should_dither) {
- c->chrDither8 = c->lumDither8 = sws_pb_64;
- }
- lastDstY = dstY;
+ for (; dstY < dstHend; dstY++) {
- ff_init_vscale_pfn(c, yuv2plane1, yuv2planeX, yuv2nv12cX,
- yuv2packed1, yuv2packed2, yuv2packedX, yuv2anyX, c->use_mmx_vfilter);
-
- ff_init_slice_from_src(src_slice, (uint8_t**)src, srcStride, c->srcW,
- srcSliceY, srcSliceH, chrSrcSliceY, chrSrcSliceH, 1);
-
- ff_init_slice_from_src(vout_slice, (uint8_t**)dst, dstStride, c->dstW,
- dstY, dstH, dstY >> c->chrDstVSubSample,
- AV_CEIL_RSHIFT(dstH, c->chrDstVSubSample), 0);
- if (srcSliceY == 0) {
- hout_slice->plane[0].sliceY = lastInLumBuf + 1;
- hout_slice->plane[1].sliceY = lastInChrBuf + 1;
- hout_slice->plane[2].sliceY = lastInChrBuf + 1;
- hout_slice->plane[3].sliceY = lastInLumBuf + 1;
-
- hout_slice->plane[0].sliceH =
- hout_slice->plane[1].sliceH =
- hout_slice->plane[2].sliceH =
- hout_slice->plane[3].sliceH = 0;
- hout_slice->width = dstW;
- }
-
- for (; dstY < dstH; dstY++) {
const int chrDstY = dstY >> c->chrDstVSubSample;
int use_mmx_vfilter= c->use_mmx_vfilter;
-
// First line needed as input
const int firstLumSrcY = FFMAX(1 - vLumFilterSize, vLumFilterPos[dstY]);
const int firstLumSrcY2 = FFMAX(1 - vLumFilterSize, vLumFilterPos[FFMIN(dstY | ((1 << c->chrDstVSubSample) - 1), dstH - 1)]);
@@ -395,9 +296,10 @@ static int swscale(SwsContext *c, const uint8_t *src[],
int posY, cPosY, firstPosY, lastPosY, firstCPosY, lastCPosY;
// handle holes (FAST_BILINEAR & weird filters)
- if (firstLumSrcY > lastInLumBuf) {
+ if (refreshBuff || firstLumSrcY > lastInLumBuf) {
hasLumHoles = lastInLumBuf != firstLumSrcY - 1;
+
if (hasLumHoles) {
hout_slice->plane[0].sliceY = firstLumSrcY;
hout_slice->plane[3].sliceY = firstLumSrcY;
@@ -407,9 +309,10 @@ static int swscale(SwsContext *c, const uint8_t *src[],
lastInLumBuf = firstLumSrcY - 1;
}
- if (firstChrSrcY > lastInChrBuf) {
+ if (refreshBuff || firstChrSrcY > lastInChrBuf) {
hasChrHoles = lastInChrBuf != firstChrSrcY - 1;
+
if (hasChrHoles) {
hout_slice->plane[1].sliceY = firstChrSrcY;
hout_slice->plane[2].sliceY = firstChrSrcY;
@@ -420,6 +323,8 @@ static int swscale(SwsContext *c, const uint8_t *src[],
lastInChrBuf = firstChrSrcY - 1;
}
+ refreshBuff = 0;
+
DEBUG_BUFFERS("dstY: %d\n", dstY);
DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
firstLumSrcY, lastLumSrcY, lastInLumBuf);
@@ -440,8 +345,8 @@ static int swscale(SwsContext *c, const uint8_t *src[],
av_assert0((lastLumSrcY - firstLumSrcY + 1) <= hout_slice->plane[0].available_lines);
av_assert0((lastChrSrcY - firstChrSrcY + 1) <= hout_slice->plane[1].available_lines);
-
posY = hout_slice->plane[0].sliceY + hout_slice->plane[0].sliceH;
+
if (posY <= lastLumSrcY && !hasLumHoles) {
firstPosY = FFMAX(firstLumSrcY, posY);
lastPosY = FFMIN(firstLumSrcY + hout_slice->plane[0].available_lines - 1, srcSliceY + srcSliceH - 1);
@@ -496,11 +401,21 @@ static int swscale(SwsContext *c, const uint8_t *src[],
if (dstY >= dstH - 2) {
/* hmm looks like we can't use MMX here without overwriting
* this array's tail */
+
+ yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
+ yuv2planarX_fn yuv2planeX = c->yuv2planeX;
+ yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
+ yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
+ yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
+ yuv2packedX_fn yuv2packedX = c->yuv2packedX;
+ yuv2anyX_fn yuv2anyX = c->yuv2anyX;
+
ff_sws_init_output_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
&yuv2packed1, &yuv2packed2, &yuv2packedX, &yuv2anyX);
use_mmx_vfilter= 0;
ff_init_vscale_pfn(c, yuv2plane1, yuv2planeX, yuv2nv12cX,
yuv2packed1, yuv2packed2, yuv2packedX, yuv2anyX, use_mmx_vfilter);
+
}
{
@@ -508,6 +423,252 @@ static int swscale(SwsContext *c, const uint8_t *src[],
desc[i].process(c, &desc[i], dstY, 1);
}
}
+
+ /* store changed local vars back in the context */
+ c->dstY = dstY;
+ c->lumBufIndex = lumBufIndex;
+ c->chrBufIndex = chrBufIndex;
+ c->lastInLumBuf = lastInLumBuf;
+ c->lastInChrBuf = lastInChrBuf;
+}
+
+#if HAVE_THREADS
+static int swscale_threads_prepare(SwsContext *c)
+{
+ int i;
+
+ if (c->is_threads_prepared) {
+ return 0;
+ }
+ c->is_threads_prepared = 1;
+
+ if (!c->threads_ctx) return 0;
+
+ for (i = 0; i < c->sw_nbthreads ; ++i) {
+ struct SwsContextThread *ctx = &c->threads_ctx[i];
+
+ memcpy(ctx->func_ctx, c ,sizeof(SwsContext));
+ ctx->func_ctx->parent = c;
+ ff_init_filters(ctx->func_ctx);
+ ctx->func_pfn = swscale_step;
+ }
+
+ return 0;
+}
+#endif
+
+
+static int swscale(SwsContext *c, const uint8_t *src[],
+ int srcStride[], int srcSliceY,
+ int srcSliceH, uint8_t *dst[], int dstStride[])
+{
+ /* load a few things into local vars to make the code more readable?
+ * and faster */
+ const int dstW = c->dstW;
+ const int dstH = c->dstH;
+
+ const enum AVPixelFormat dstFormat = c->dstFormat;
+ const int flags = c->flags;
+
+ const int vLumFilterSize = c->vLumFilterSize;
+ const int vChrFilterSize = c->vChrFilterSize;
+
+ yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
+ yuv2planarX_fn yuv2planeX = c->yuv2planeX;
+ yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
+ yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
+ yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
+ yuv2packedX_fn yuv2packedX = c->yuv2packedX;
+ yuv2anyX_fn yuv2anyX = c->yuv2anyX;
+ const int chrSrcSliceY = srcSliceY >> c->chrSrcVSubSample;
+ const int chrSrcSliceH = AV_CEIL_RSHIFT(srcSliceH, c->chrSrcVSubSample);
+ int should_dither = isNBPS(c->srcFormat) ||
+ is16BPS(c->srcFormat);
+ int lastDstY;
+
+ /* vars which will change and which we need to store back in the context */
+ int dstY = c->dstY;
+ int lastInLumBuf = c->lastInLumBuf;
+ int lastInChrBuf = c->lastInChrBuf;
+
+
+ int lumStart = 0;
+
+ SwsSlice *src_slice = &c->slice[lumStart];
+ SwsSlice *hout_slice = &c->slice[c->numSlice-2];
+ SwsSlice *vout_slice = &c->slice[c->numSlice-1];
+
+ int needAlpha = c->needAlpha;
+ SwsContextStep *step;
+ int last_chunk;
+
+#if HAVE_THREADS
+ int nbthreads = c->sw_nbthreads;
+ int left_lines;
+ int lines_per_thread = 0;
+ struct SwsContextThread *ctx;
+#endif
+
+ if (isPacked(c->srcFormat)) {
+ src[0] =
+ src[1] =
+ src[2] =
+ src[3] = src[0];
+ srcStride[0] =
+ srcStride[1] =
+ srcStride[2] =
+ srcStride[3] = srcStride[0];
+ }
+ srcStride[1] <<= c->vChrDrop;
+ srcStride[2] <<= c->vChrDrop;
+
+ DEBUG_BUFFERS("swscale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
+ src[0], srcStride[0], src[1], srcStride[1],
+ src[2], srcStride[2], src[3], srcStride[3],
+ dst[0], dstStride[0], dst[1], dstStride[1],
+ dst[2], dstStride[2], dst[3], dstStride[3]);
+ DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
+ srcSliceY, srcSliceH, dstY, dstH);
+ DEBUG_BUFFERS("vLumFilterSize: %d vChrFilterSize: %d\n",
+ vLumFilterSize, vChrFilterSize);
+
+ if (dstStride[0]&15 || dstStride[1]&15 ||
+ dstStride[2]&15 || dstStride[3]&15) {
+ static int warnedAlready = 0; // FIXME maybe move this into the context
+ if (flags & SWS_PRINT_INFO && !warnedAlready) {
+ av_log(c, AV_LOG_WARNING,
+ "Warning: dstStride is not aligned!\n"
+ " ->cannot do aligned memory accesses anymore\n");
+ warnedAlready = 1;
+ }
+ }
+
+ if ( (uintptr_t)dst[0]&15 || (uintptr_t)dst[1]&15 || (uintptr_t)dst[2]&15
+ || (uintptr_t)src[0]&15 || (uintptr_t)src[1]&15 || (uintptr_t)src[2]&15
+ || dstStride[0]&15 || dstStride[1]&15 || dstStride[2]&15 || dstStride[3]&15
+ || srcStride[0]&15 || srcStride[1]&15 || srcStride[2]&15 || srcStride[3]&15
+ ) {
+ static int warnedAlready=0;
+ int cpu_flags = av_get_cpu_flags();
+ if (HAVE_MMXEXT && (cpu_flags & AV_CPU_FLAG_SSE2) && !warnedAlready){
+ av_log(c, AV_LOG_WARNING, "Warning: data is not aligned! This can lead to a speed loss\n");
+ warnedAlready=1;
+ }
+ }
+
+ /* Note the user might start scaling the picture in the middle so this
+ * will not get executed. This is not really intended but works
+ * currently, so people might do it. */
+ if (srcSliceY == 0) {
+ dstY = 0;
+ lastInLumBuf = -1;
+ lastInChrBuf = -1;
+ }
+
+ if (!should_dither) {
+ c->chrDither8 = c->lumDither8 = sws_pb_64;
+ }
+ lastDstY = dstY;
+
+ ff_init_vscale_pfn(c, yuv2plane1, yuv2planeX, yuv2nv12cX,
+ yuv2packed1, yuv2packed2, yuv2packedX, yuv2anyX, c->use_mmx_vfilter);
+
+ ff_init_slice_from_src(src_slice, (uint8_t**)src, srcStride, c->srcW,
+ srcSliceY, srcSliceH, chrSrcSliceY, chrSrcSliceH, 1);
+
+ ff_init_slice_from_src(vout_slice, (uint8_t**)dst, dstStride, c->dstW,
+ dstY, dstH, dstY >> c->chrDstVSubSample,
+ AV_CEIL_RSHIFT(dstH, c->chrDstVSubSample), 0);
+ if (srcSliceY == 0) {
+ hout_slice->plane[0].sliceY = lastInLumBuf + 1;
+ hout_slice->plane[1].sliceY = lastInChrBuf + 1;
+ hout_slice->plane[2].sliceY = lastInChrBuf + 1;
+ hout_slice->plane[3].sliceY = lastInLumBuf + 1;
+
+ hout_slice->plane[0].sliceH =
+ hout_slice->plane[1].sliceH =
+ hout_slice->plane[2].sliceH =
+ hout_slice->plane[3].sliceH = 0;
+ hout_slice->width = dstW;
+ }
+
+ last_chunk = dstH - dstY;
+
+#if HAVE_THREADS
+ left_lines = last_chunk;
+
+ if (nbthreads > 1 && c->threads_ctx) {
+ int slice_round = 64;
+
+ /* Calculate two last lines at the end of threads. */
+ last_chunk = 2;
+ left_lines = left_lines - last_chunk;
+ lines_per_thread = (left_lines + nbthreads -1)/nbthreads;
+
+ if (lines_per_thread < slice_round)
+ lines_per_thread = slice_round;
+ else if (lines_per_thread & (slice_round - 1))
+ lines_per_thread += slice_round - (lines_per_thread & (slice_round - 1));
+
+ if (lines_per_thread > left_lines)
+ lines_per_thread = left_lines;
+
+ nbthreads = (left_lines + lines_per_thread -1)/lines_per_thread;
+ } else {
+ nbthreads = 0;
+ }
+
+ swscale_threads_prepare(c);
+
+ for (int s = 0; s < nbthreads; s++) {
+ int chunk = lines_per_thread;
+ if (chunk > left_lines) {
+ chunk = left_lines;
+ /* Use current thread to calc last part. */
+ last_chunk += left_lines;
+ break;
+ }
+
+ left_lines -= chunk;
+ if (chunk <= 0)
+ break;
+
+ ctx = &c->threads_ctx[s];
+ step = &ctx->func_ctx->step_param;
+
+ step->dstY= dstY + s * lines_per_thread;
+ step->dstHend = dstY + s * lines_per_thread + chunk;
+ step->dstH = dstH;
+ step->srcSliceY = srcSliceY;
+ step->srcSliceH = srcSliceH;
+
+ pthread_mutex_lock(&ctx->process_mutex);
+ ctx->t_work = 1;
+ pthread_cond_signal(&ctx->process_cond);
+ pthread_mutex_unlock(&ctx->process_mutex);
+ }
+
+#endif
+
+ /*
+ * Calculate last /all lines in slice at the end
+ * to actualize original SwsContext structure.
+ */
+ step = &c->step_param;
+ step->dstY= dstH - last_chunk;
+ step->dstHend = dstH;
+ step->dstH = dstH;
+ step->srcSliceY = srcSliceY;
+ step->srcSliceH = srcSliceH;
+ swscale_step(c);
+
+ dstY = c->dstY;
+
+#if HAVE_THREADS
+ swscale_thread_wait_finish(c);
+#endif
+
+
if (isPlanar(dstFormat) && isALPHA(dstFormat) && !needAlpha) {
int length = dstW;
int height = dstY - lastDstY;
@@ -527,13 +688,6 @@ static int swscale(SwsContext *c, const uint8_t *src[],
#endif
emms_c();
- /* store changed local vars back in the context */
- c->dstY = dstY;
- c->lumBufIndex = lumBufIndex;
- c->chrBufIndex = chrBufIndex;
- c->lastInLumBuf = lastInLumBuf;
- c->lastInChrBuf = lastInChrBuf;
-
return dstY - lastDstY;
}
@@ -275,6 +275,17 @@ typedef void (*yuv2anyX_fn)(struct SwsContext *c, const int16_t *lumFilter,
struct SwsSlice;
struct SwsFilterDescriptor;
+#if HAVE_THREADS
+struct SwsContextThread;
+#endif
+
+typedef struct SwsContextStep {
+ int dstY;
+ int dstHend;
+ int dstH;
+ int srcSliceY;
+ int srcSliceH;
+} SwsContextStep;
/* This struct should be aligned on at least a 32-byte boundary. */
typedef struct SwsContext {
@@ -625,9 +636,44 @@ typedef struct SwsContext {
SwsDither dither;
SwsAlphaBlend alphablend;
+
+ /*
+ * Parent set if work on copy of SwsContext for multithreading.
+ */
+ struct SwsContext *parent;
+
+ /*
+ * Temporary variable to processing swscale_step().
+ */
+ SwsContextStep step_param;
+
+#if HAVE_THREADS
+ int is_threads_prepared;
+ int sw_nbthreads; //Number of threads to processing scale
+ struct SwsContextThread *threads_ctx;
+
+#endif
+
} SwsContext;
//FIXME check init (where 0)
+#if HAVE_THREADS
+struct SwsContextThread {
+ void (*func_pfn)(SwsContext *c);
+ SwsContext *func_ctx;
+
+ pthread_t f_thread;
+ pthread_cond_t process_cond;
+ pthread_cond_t finish_cond;
+ pthread_mutex_t process_mutex;
+ pthread_mutex_t finish_mutex;
+ volatile int t_work;
+ volatile int t_end;
+};
+
+void swscale_thread_wait_finish(struct SwsContext *c);
+#endif
+
SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c);
int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4],
int fullRange, int brightness,
@@ -27,6 +27,7 @@
#include <math.h>
#include <stdio.h>
#include <string.h>
+#include <pthread.h>
#if HAVE_MMAP
#include <sys/mman.h>
#if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
@@ -1156,6 +1157,144 @@ static enum AVPixelFormat alphaless_fmt(enum AVPixelFormat fmt)
}
}
+
+#if HAVE_THREADS
+static void *swscale_thread(void *arg)
+{
+ struct SwsContextThread *ctx = (struct SwsContextThread *)arg;
+
+ while(1) {
+ pthread_mutex_lock(&ctx->process_mutex);
+ while (ctx->t_work == 0 && !ctx->t_end)
+ pthread_cond_wait(&ctx->process_cond, &ctx->process_mutex);
+ pthread_mutex_unlock(&ctx->process_mutex);
+
+ if (ctx->t_end)
+ break;
+
+ ctx->func_pfn(ctx->func_ctx);
+
+ pthread_mutex_lock(&ctx->finish_mutex);
+ ctx->t_work = 0;
+ pthread_cond_signal(&ctx->finish_cond);
+ pthread_mutex_unlock(&ctx->finish_mutex);
+ }
+
+ return NULL;
+}
+
+static void swscale_thread_deinit(SwsContext *c)
+{
+ struct SwsContextThread *ctx;
+ SwsContext *context;
+ int i;
+
+ if (!c->threads_ctx)
+ return;
+
+ for (i = 0; i < c->sw_nbthreads; ++i) {
+ ctx = &c->threads_ctx[i];
+ pthread_mutex_lock(&ctx->process_mutex);
+ ctx->t_end = 1;
+ pthread_cond_signal(&ctx->process_cond);
+ pthread_mutex_unlock(&ctx->process_mutex);
+ }
+
+ for (i = 0; i < c->sw_nbthreads; ++i) {
+ if (c->threads_ctx[i].f_thread)
+ pthread_join(c->threads_ctx[i].f_thread, NULL);
+ }
+
+ for (i = 0; i < c->sw_nbthreads; ++i) {
+ ctx = &c->threads_ctx[i];
+ pthread_mutex_destroy(&ctx->process_mutex);
+ pthread_mutex_destroy(&ctx->finish_mutex);
+ pthread_cond_destroy(&ctx->process_cond);
+ pthread_cond_destroy(&ctx->finish_cond);
+ context = ctx->func_ctx;
+ if(context){
+ ff_free_filters(context);
+ av_free(context);
+ ctx->func_ctx = NULL;
+ }
+ }
+
+ av_free(c->threads_ctx);
+ c->threads_ctx = NULL;
+}
+
+static int swscale_thread_init(SwsContext *c)
+{
+ struct SwsContextThread *ctx;
+ SwsContext *copy_ctx;
+ int ret = 0;
+ int i;
+
+ c->threads_ctx = av_mallocz(c->sw_nbthreads * sizeof(*c->threads_ctx));
+ if (!c->threads_ctx) {
+ ret = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ c->is_threads_prepared = 0;
+
+ for (i = 0; i < c->sw_nbthreads; ++i) {
+ ctx = &c->threads_ctx[i];
+ ctx->t_work = 0;
+ ctx->t_end = 0;
+ pthread_mutex_init(&ctx->process_mutex, NULL);
+ pthread_mutex_init(&ctx->finish_mutex, NULL);
+ pthread_cond_init(&ctx->process_cond, NULL);
+ pthread_cond_init(&ctx->finish_cond, NULL);
+ }
+
+ for (i = 0; i < c->sw_nbthreads; ++i) {
+ ctx = &c->threads_ctx[i];
+
+ copy_ctx = sws_alloc_context();
+ if (!copy_ctx) {
+ ret = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ ctx->func_ctx = copy_ctx;
+ }
+
+ for (i = 0; i < c->sw_nbthreads; ++i) {
+ ctx = &c->threads_ctx[i];
+
+ if ((ret = pthread_create(&c->threads_ctx[i].f_thread, NULL, swscale_thread, &c->threads_ctx[i]))) {
+ if (!copy_ctx) {
+ ret = AVERROR(ret);
+ goto fail;
+ }
+ }
+ }
+
+fail:
+ if (ret)
+ swscale_thread_deinit(c);
+
+ return ret;
+}
+
+void swscale_thread_wait_finish(SwsContext *c)
+{
+ int i;
+ if (!c->sw_nbthreads)
+ return;
+
+ for (i = 0; i < c->sw_nbthreads; i++) {
+ struct SwsContextThread *ctx = &c->threads_ctx[i];
+ pthread_mutex_lock(&ctx->finish_mutex);
+ while(ctx->t_work != 0)
+ pthread_cond_wait(&ctx->finish_cond, &ctx->finish_mutex);
+ pthread_mutex_unlock(&ctx->finish_mutex);
+ }
+}
+
+#endif
+
av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
SwsFilter *dstFilter)
{
@@ -1823,7 +1962,14 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
}
c->swscale = ff_getSwsFunc(c);
- return ff_init_filters(c);
+
+ ret = ff_init_filters(c);
+
+#if HAVE_THREADS
+ ret = swscale_thread_init(c);
+#endif
+
+ return ret;
fail: // FIXME replace things by appropriate error codes
if (ret == RETCODE_USE_CASCADE) {
int tmpW = sqrt(srcW * (int64_t)dstW);
@@ -2308,6 +2454,10 @@ void sws_freeContext(SwsContext *c)
if (!c)
return;
+#if HAVE_THREADS
+ swscale_thread_deinit(c);
+#endif
+
for (i = 0; i < 4; i++)
av_freep(&c->dither_error[i]);