@@ -4,6 +4,7 @@ releases are sorted from youngest to oldest.
version <next>:
- ADPCM IMA Westwood encoder
- Westwood AUD muxer
+- Intel IPP accelerated video scaling filter
version 4.4:
@@ -240,6 +240,7 @@ External library support:
--enable-libgsm enable GSM de/encoding via libgsm [no]
--enable-libiec61883 enable iec61883 via libiec61883 [no]
--enable-libilbc enable iLBC de/encoding via libilbc [no]
+ --enable-libipp enable Intel IPP libary based scaling [no]
--enable-libjack enable JACK audio sound server [no]
--enable-libklvanc enable Kernel Labs VANC processing [no]
--enable-libkvazaar enable HEVC encoding via libkvazaar [no]
@@ -1766,6 +1767,7 @@ EXTERNAL_LIBRARY_NONFREE_LIST="
libfdk_aac
openssl
libtls
+ libipp
"
EXTERNAL_LIBRARY_VERSION3_LIST="
@@ -3643,6 +3645,7 @@ rubberband_filter_deps="librubberband"
sab_filter_deps="gpl swscale"
scale2ref_filter_deps="swscale"
scale_filter_deps="swscale"
+scale_ipp_filter_deps="libipp"
scale_qsv_filter_deps="libmfx"
scdet_filter_select="scene_sad"
select_filter_select="scene_sad"
@@ -6412,6 +6415,17 @@ if enabled libmfx; then
check_cc MFX_CODEC_VP9 "mfx/mfxvp9.h mfx/mfxstructures.h" "MFX_CODEC_VP9"
fi
+if enabled libipp; then
+ ipp_header_for_check='ippcore.h'
+ case $target_os in
+ mingw32*|mingw64*)
+ ipp_header_for_check='_mingw.h ippcore.h'
+ ;;
+ esac
+ check_lib libipp "$ipp_header_for_check" ippInit -Wl,--start-group -lippi -lipps -lippcore -lippvm -Wl,--end-group ||
+ die "ERROR: Intel IPP not found"
+fi
+
enabled libmodplug && require_pkg_config libmodplug libmodplug libmodplug/modplug.h ModPlug_Load
enabled libmp3lame && require "libmp3lame >= 3.98.3" lame/lame.h lame_set_VBR_quality -lmp3lame $libm_extralibs
enabled libmysofa && { check_pkg_config libmysofa libmysofa mysofa.h mysofa_neighborhood_init_withstepdefine ||
@@ -6490,7 +6504,6 @@ enabled libvpx && {
die "libvpx enabled but no supported decoders found"
fi
}
-
enabled libwebp && {
enabled libwebp_encoder && require_pkg_config libwebp "libwebp >= 0.2.0" webp/encode.h WebPGetEncoderVersion
enabled libwebp_anim_encoder && check_pkg_config libwebp_anim_encoder "libwebpmux >= 0.4.0" webp/mux.h WebPAnimEncoderOptionsInit; }
@@ -17706,6 +17706,54 @@ If the specified expression is not valid, it is kept at its current
value.
@end table
+@section scale_ipp
+
+Use the Intel Performance Primitives library (libipp) to perform x86 optimized frames scaling.
+Setting the output width, height and the output display aspect ratio
+works in the same way as for the @ref{scale} filter.
+
+Filter supports YUV420 (AV_PIX_FMT_YUV420P) and YUV420p10 (AV_PIX_FMT_YUV420P10LE) image formats only,
+no input-output format conversions provided.
+
+Interlaced images scaling is not supported.
+
+The following additional options are accepted:
+@table @option
+
+@item interpolation, flags
+The interpolation algorithm used for resizing. One of the following:
+@table @option
+@item nn
+Nearest neighbour.
+
+@item linear
+@item cubic
+2-parameter cubic (B=0, C=1/2)
+
+@item super
+Supersampling (could be used for downscaling only).
+Default interpolation value.
+
+@item lanczos
+@end table
+
+@item ipp_antialiasing
+Enables internal IPP anti-aliasing ( @code{0} by default).
+Provides smoothing of jagged edges, but decreases performance.
+Could be used for linear, cubic and lancsoz interpolation algorithms only.
+
+@item ipp_threading
+Enables IPP scaling filter threading usage (@code{1} by default).
+By default the number of threads used is selected automatically basing on the number of logical processors available,
+output image size and interpolation used for scaling.
+Set @var{ipp_threading} to @code{0} to switch IPP scaling filter threading off.
+
+@item threads
+The maximal allowed number of threads executing the IPP scaling filter. The actual number of threads used will be equal to
+ @var{threads} or in some cases less basing on the number of logical processors available,
+output image size and interpolation used for scaling
+@end table
+
@section scale_npp
Use the NVIDIA Performance Primitives (libnpp) to perform scaling and/or pixel
@@ -393,6 +393,7 @@ OBJS-$(CONFIG_SAB_FILTER) += vf_sab.o
OBJS-$(CONFIG_SCALE_FILTER) += vf_scale.o scale_eval.o
OBJS-$(CONFIG_SCALE_CUDA_FILTER) += vf_scale_cuda.o scale_eval.o \
vf_scale_cuda.ptx.o vf_scale_cuda_bicubic.ptx.o
+OBJS-$(CONFIG_SCALE_IPP_FILTER) += vf_scale_ipp.o scale_eval.o
OBJS-$(CONFIG_SCALE_NPP_FILTER) += vf_scale_npp.o scale_eval.o
OBJS-$(CONFIG_SCALE_QSV_FILTER) += vf_scale_qsv.o
OBJS-$(CONFIG_SCALE_VAAPI_FILTER) += vf_scale_vaapi.o scale_eval.o vaapi_vpp.o
@@ -375,6 +375,7 @@ extern const AVFilter ff_vf_rotate;
extern const AVFilter ff_vf_sab;
extern const AVFilter ff_vf_scale;
extern const AVFilter ff_vf_scale_cuda;
+extern const AVFilter ff_vf_scale_ipp;
extern const AVFilter ff_vf_scale_npp;
extern const AVFilter ff_vf_scale_qsv;
extern const AVFilter ff_vf_scale_vaapi;
@@ -30,8 +30,8 @@
#include "libavutil/version.h"
#define LIBAVFILTER_VERSION_MAJOR 8
-#define LIBAVFILTER_VERSION_MINOR 0
-#define LIBAVFILTER_VERSION_MICRO 101
+#define LIBAVFILTER_VERSION_MINOR 1
+#define LIBAVFILTER_VERSION_MICRO 100
#define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
new file mode 100644
@@ -0,0 +1,1247 @@
+/*
+ * Copyright (c) 2021 Intel
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Intel IPP library based x86 optimized scale video filter.
+ * Uses SIMD instructions up to AVX512 for suitable CPUs
+ * and multithreading for scaling optimization
+ */
+#include <stdio.h>
+#include <string.h>
+
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "libavutil/avassert.h"
+#include "libavutil/avstring.h"
+#include "libavutil/eval.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/internal.h"
+#include "libavutil/mathematics.h"
+#include "libavutil/opt.h"
+#include "libavutil/parseutils.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/x86/cpu.h"
+#include "scale_eval.h"
+#include "video.h"
+
+#include "ippcore.h"
+#include "ippi.h"
+#include "ipps.h"
+
+
+static const char *const var_names[] = {"in_w", "iw", "in_h", "ih",
+ "out_w", "ow", "out_h", "oh",
+ "a", // antialiasing
+ NULL};
+
+enum var_name {
+ VAR_IN_W,
+ VAR_IW,
+ VAR_IN_H,
+ VAR_IH,
+ VAR_OUT_W,
+ VAR_OW,
+ VAR_OUT_H,
+ VAR_OH,
+ VAR_A,
+ VARS_NB
+};
+
+struct IPPparallelResizeInfo;
+struct IPPScaleContext;
+// IPP wrapper function ptr
+typedef IppStatus (*ippResize_YUV420_ptr)(struct IPPScaleContext *scale, const Ipp8u* src_yuv[], int* src_step, Ipp8u* dst_yuv[], int* dst_step);
+//Exact ippi resize function ptr
+typedef IppStatus (IPP_STDCALL *ippiResize_C1R_L_ptr)(const Ipp8u* src_yuv, IppSizeL src_step, Ipp8u* dst_yuv, IppSizeL dst_step, IppiPointL dstOffset, IppiSizeL dst_size, const IppiResizeSpec* ipp_spec, Ipp8u* pbuffer_y);
+typedef IppStatus (IPP_STDCALL *ippiResize_border_C1R_L_ptr)(const Ipp8u* src_yuv, IppSizeL src_step, Ipp8u* dst_yuv, IppSizeL dst_step, IppiPointL dstOffset, IppiSizeL dst_size, IppiBorderType border, const Ipp8u* pBorderValue, const IppiResizeSpec* ipp_spec, Ipp8u* pbuffer_y);
+
+
+typedef IppStatus(*functype_l)(IppSizeL i, void* arg);
+
+typedef struct
+{
+ IppiPointL split;
+ IppiSizeL tile_size;
+ IppiSizeL last_tile_size;
+ IppSizeL tile_buffer_size;
+ IppSizeL last_tile_buffer_size;
+ IppSizeL num_tiles;
+} IppResizeInfo_LT;
+
+static const enum AVPixelFormat supported_formats[] = {
+ AV_PIX_FMT_YUV420P,
+ AV_PIX_FMT_YUV420P10LE,
+ AV_PIX_FMT_NONE
+};
+
+typedef struct IPPScaleContext {
+ const AVClass *class;
+ AVDictionary *opts;
+
+ /*
+ * New dimensions. Special values are:
+ * 0 = original width/height
+ * -1 = keep original aspect
+ * -N = try to keep aspect but make sure it is divisible by N
+ */
+ int w, h;
+ char *size_str;
+ unsigned int flags; /// scaling flags
+
+ int interlaced; // not in use currently
+
+ char *w_expr; ///< width expression string
+ char *h_expr; ///< height expression string
+ AVExpr *w_pexpr;
+ AVExpr *h_pexpr;
+ double var_values[VARS_NB];
+
+ char *flags_str;
+
+ int force_original_aspect_ratio;
+ int force_divisible_by;
+
+ int interpolation;
+ int ipp_antialiasing;
+
+ int ipp_threading;
+ Ipp32u ipp_threads;
+ int ipp_data_type; //ipp8u or ipp16u
+
+ //ipp library functions pointers to switch between interpolation types and 8/16 bits data
+ ippResize_YUV420_ptr ipp_resize_yuv420_func;
+ ippiResize_C1R_L_ptr ippi_resize_c1r_func;
+ ippiResize_border_C1R_L_ptr ippi_resize_border_c1r_func;
+
+ avfilter_action_func *ippi_resize_y420_lt_ptr;
+ avfilter_action_func *ippi_resize_uv420_lt_ptr;
+
+ IppiResizeSpec *ipp_spec;
+ IppiResizeSpec *ipp_spec_uv; //2 times smaller components
+
+ Ipp8u* src_yuv[3]; //all components source ptr
+ Ipp8u* dst_yuv[3]; //all components dst ptr
+ int src_step[3];
+ int dst_step[3];
+
+ IppiSizeL src_size;
+ IppiSizeL src_size_uv;
+ IppiSizeL dst_size;
+ IppiSizeL dst_size_uv;
+ IppResizeInfo_LT tile_resize_info;
+ IppResizeInfo_LT tile_resize_info_uv;
+
+ unsigned char *init_buffer;
+ unsigned char **pbuffer_y;
+ unsigned char **pbuffer_u;
+ unsigned char **pbuffer_v;
+} IPPScaleContext;
+
+AVFilter ff_vf_scale_ipp;
+
+#define IPP_ALIGNED_SIZE(size, align) (((size)+(align)-1)&~((align)-1))
+/* Heuristic constants for image tiling */
+#define RESIZE_NEAREST_MIN_PIX_NUMBER (256 * 128)
+#define RESIZE_LINEAR_MIN_PIX_NUMBER (256 * 224)
+#define RESIZE_CUBIC_MIN_PIX_NUMBER (256 * 224)
+#define RESIZE_LANCZOS_MIN_PIX_NUMBER RESIZE_CUBIC_MIN_PIX_NUMBER
+#define RESIZE_SUPER_MIN_PIX_NUMBER (256 * 192)
+
+static const IppiSizeL RESIZE_NEAREST_MIN_TILE_SIZE = { 1,1 };
+static const IppiSizeL RESIZE_LINEAR_MIN_TILE_SIZE = { 8,8 };
+static const IppiSizeL RESIZE_CUBIC_MIN_TILE_SIZE = { 16,16 };
+static const IppiSizeL RESIZE_LANCZOS_MIN_TILE_SIZE = { 24,24 };
+static const IppiSizeL RESIZE_SUPER_MIN_TILE_SIZE = { 16,16 };
+
+static IppStatus ipp_resize_get_tilebuffer_size(IppiResizeSpec *ipp_spec, IppResizeInfo_LT* tile_resize_info)
+{
+ IppStatus status = ippStsNoErr;
+ IppSizeL s0 = 0, s1 = 0;
+ /* compute work buffer for each thread/tile */
+ /* if split is done by rows and cols considering the last col could be wider
+ and/or the last raw could be higher we need to use max buffer size for all tiles */
+ if ((tile_resize_info->split.y > 1) && (tile_resize_info->split.x > 1)) {
+ IppiSizeL maxTileSize = {IPP_MAX(tile_resize_info->tile_size.width, tile_resize_info->last_tile_size.width),
+ IPP_MAX(tile_resize_info->tile_size.height, tile_resize_info->last_tile_size.height)};
+ status = ippiResizeGetBufferSize_L(ipp_spec, maxTileSize, 1, &s0);
+ } else {
+ /* the single row or column */
+ status = ippiResizeGetBufferSize_L(ipp_spec, tile_resize_info->tile_size, 1, &s0);
+ }
+ status = ippiResizeGetBufferSize_L(ipp_spec, tile_resize_info->last_tile_size, 1, &s1);
+
+ tile_resize_info->tile_buffer_size = IPP_ALIGNED_SIZE(s0, 64);
+ tile_resize_info->last_tile_buffer_size = IPP_ALIGNED_SIZE(s1, 64);
+ return status;
+}
+
+static void split(IppSizeL splitSize, IppSizeL multiplier, IppSizeL threshold, IppSizeL *tileLength, IppSizeL *residual, IppSizeL *num_tiles)
+{
+ IppSizeL k, size, res;
+ size = *tileLength;
+ res = *residual;
+ if (size * multiplier > threshold)
+ {
+ k = *num_tiles;
+
+ while (size * multiplier > threshold)
+ {
+ if (splitSize / (k + 1) == 0)
+ break;
+ k++;
+ size = splitSize / k;
+ res = splitSize % k;
+ }
+ if (res > size) {
+ k += res / size;
+ size = splitSize / k;
+ res = splitSize % k;
+ }
+ *tileLength = size;
+ *residual = res;
+ *num_tiles = k;
+ }
+}
+
+static void get_tilesize_simple(IppiSizeL roiSize, IppSizeL min_item_number, IppiSizeL minTileSize, IppiSizeL *pTileSize, Ipp32u numThreads)
+{
+ if (roiSize.width * roiSize.height <= min_item_number || numThreads == 1) {
+ pTileSize->width = roiSize.width;
+ pTileSize->height = roiSize.height;
+ } else {
+ IppiSizeL tile_size;
+ IppiSizeL residualSize = { 0 };
+ IppSizeL cols = 1, rows = 1;
+ IppSizeL desired_item_number;
+
+ tile_size.width = roiSize.width;
+ tile_size.height = roiSize.height;
+
+ desired_item_number = tile_size.width * tile_size.height / numThreads;
+ desired_item_number = IPP_MAX(min_item_number, desired_item_number);
+ desired_item_number = IPP_MAX(tile_size.width * minTileSize.height, desired_item_number);
+
+ /* splitting by row */
+ split(roiSize.height, tile_size.width, desired_item_number, &tile_size.height, &residualSize.height, &rows);
+
+ desired_item_number = tile_size.width * tile_size.height * rows / numThreads;
+ desired_item_number = IPP_MAX(min_item_number, desired_item_number);
+ desired_item_number = IPP_MAX(tile_size.height * minTileSize.width, desired_item_number);
+
+ /* splitting by col */
+ split(roiSize.width, tile_size.height, desired_item_number, &tile_size.width, &residualSize.width, &cols);
+
+ pTileSize->width = tile_size.width;
+ pTileSize->height = tile_size.height;
+ }
+ return;
+}
+
+static void split_to_tiles(IppiSizeL roiSize, IppiSizeL tile_size, IppiPointL *pSplit, IppiSizeL *pTileSize, IppiSizeL *pLastSize)
+{
+ IppSizeL width = roiSize.width;
+ IppSizeL height = roiSize.height;
+ IppSizeL width_tile = tile_size.width;
+ IppSizeL height_tile = tile_size.height;
+ IppSizeL width_last, height_last;
+ IppSizeL addition_x, addition_y, addition_last_x, addition_last_y;
+ if (width_tile > width) width_tile = width;
+ if (height_tile > height) height_tile = height;
+ width_last = width % width_tile;
+ height_last = height % height_tile;
+ (*pSplit).x = (IppSizeL)(width / width_tile);
+ (*pSplit).y = (IppSizeL)(height / height_tile);
+ (*pTileSize).height = height_tile;
+
+ if ((height_last < (*pSplit).y) && height_last) {
+ (*pTileSize).width = width_tile;
+ (*pTileSize).height = height_tile;
+ (*pLastSize).width = width_tile + width_last;
+ (*pLastSize).height = height_tile + height_last;
+ } else {
+ addition_x = width_last / (*pSplit).x;
+ addition_y = height_last / (*pSplit).y;
+ (*pTileSize).width = width_tile + addition_x;
+ (*pTileSize).height = height_tile + addition_y;
+ addition_last_x = width_last % ((*pSplit).x);
+ addition_last_y = height_last % ((*pSplit).y);
+ (*pLastSize).width = (*pTileSize).width + addition_last_x;
+ (*pLastSize).height = height_tile + addition_y + addition_last_y;
+ }
+}
+
+static IppStatus get_tile_params_by_index(IppSizeL index, IppiPointL split_image, IppiSizeL tile_size, IppiSizeL tail_size, IppiPointL *pTileOffset, IppiSizeL *pTileSize)
+{
+ IppSizeL i, j;
+ IppSizeL first_greater_index = 1;
+ int k;
+ int add;
+ if (pTileOffset == NULL || pTileSize == NULL) return ippStsNullPtrErr;
+ i = index / split_image.x;
+ j = index % split_image.x;
+
+ if (i >= split_image.y)
+ return ippStsSizeErr;
+
+ (*pTileOffset).x = j * tile_size.width;
+ (*pTileOffset).y = i * tile_size.height;
+ (*pTileSize).width = (j < split_image.x - 1) ? tile_size.width : tail_size.width;
+ first_greater_index = tail_size.height - tile_size.height;
+ k = split_image.y - index;
+ add = split_image.y - first_greater_index;
+ if ((first_greater_index < split_image.y) && (tail_size.height > tile_size.height) && (first_greater_index > 0)) {
+ if (i < split_image.y - first_greater_index) {
+ (*pTileSize).height = tile_size.height;
+ } else {
+ (*pTileSize).height = (tile_size.height + 1);
+ if (k < first_greater_index)
+ (*pTileOffset).y = i * tile_size.height + (i - add) * 1;
+ else
+ (*pTileOffset).y = i * tile_size.height;
+ }
+ } else {
+ (*pTileSize).height = (i < split_image.y - 1) ? tile_size.height : tail_size.height;
+ }
+ return ippStsNoErr;
+}
+
+static void get_tile_by_index(int index, IppiPointL split_image, IppiSizeL tile_size, IppiSizeL tail_size, IppiPointL *pTileOffset, IppiSizeL *pTileSize)
+{
+ IppiPointL tileOffsetL = { 0 };
+ get_tile_params_by_index(index, split_image, tile_size, tail_size, &tileOffsetL, pTileSize);
+ pTileOffset->x = (int)tileOffsetL.x;
+ pTileOffset->y = (int)tileOffsetL.y;
+}
+
+static Ipp8u* get_image_pointer_8u(const Ipp8u* pData, IppSizeL dataStep, IppSizeL w, IppSizeL h, int ipp_data_type)
+{
+ Ipp8u* ret_ptr = 0;
+ if (ipp_data_type == ipp8u) {
+ ret_ptr = (Ipp8u*)((Ipp8u*)(pData + w) + h * dataStep);
+ } else { //Ipp16u type
+ ret_ptr = (Ipp8u*)((Ipp8u*)((Ipp16u*)(pData) + w) + h * dataStep);
+ }
+ return ret_ptr;
+}
+
+static int alloc_internal_ipp_buffers(unsigned char **pbuffer[], IppResizeInfo_LT* tile_resize_info, int ipp_data_type)
+{
+ int buffer_size = tile_resize_info->tile_buffer_size;
+ *pbuffer = av_calloc(tile_resize_info->num_tiles, sizeof(unsigned char*));
+ for (int i = 0; i < tile_resize_info->num_tiles; i++) {
+ if (i == (tile_resize_info->num_tiles - 1))
+ buffer_size = tile_resize_info->last_tile_buffer_size;
+ if (ipp_data_type == ipp8u)
+ (*pbuffer)[i] = ippsMalloc_8u(buffer_size);
+ else
+ (*pbuffer)[i] = (unsigned char*)ippsMalloc_16u(buffer_size);
+
+ if ((*pbuffer)[i] == 0)
+ return AVERROR(ENOMEM);
+ }
+ return 0;
+}
+
+static int ipp_resize_uv420_lt_func(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+ IPPScaleContext *scale = ctx->priv;
+ IppStatus thread_status;
+ const Ipp8u *src_roi_uv = 0;
+ Ipp8u *dst_roi_uv = 0;
+ IppiSizeL roi_size_uv = { 0 };
+ IppiPointL roi_offset_uv, src_roi_offset_uv;
+ IppResizeInfo_LT *tile_info_uv = &scale->tile_resize_info_uv;
+
+ if (jobnr < tile_info_uv->num_tiles) { //U
+ get_tile_by_index(jobnr, tile_info_uv->split, tile_info_uv->tile_size, tile_info_uv->last_tile_size, &roi_offset_uv, &roi_size_uv);
+ thread_status = ippiResizeGetSrcOffset_L(scale->ipp_spec_uv, roi_offset_uv, &src_roi_offset_uv);
+ if (thread_status < 0)
+ return thread_status;
+
+ src_roi_uv = get_image_pointer_8u(scale->src_yuv[1], scale->src_step[1], src_roi_offset_uv.x, src_roi_offset_uv.y, scale->ipp_data_type);
+ dst_roi_uv = get_image_pointer_8u(scale->dst_yuv[1], scale->dst_step[1], roi_offset_uv.x, roi_offset_uv.y, scale->ipp_data_type);
+ thread_status = scale->ippi_resize_c1r_func(src_roi_uv, (IppSizeL)scale->src_step[1], dst_roi_uv, (IppSizeL)scale->dst_step[1], roi_offset_uv, roi_size_uv, scale->ipp_spec_uv, scale->pbuffer_u[jobnr]);
+ } else { //V
+ get_tile_by_index(jobnr - tile_info_uv->num_tiles, tile_info_uv->split, tile_info_uv->tile_size, tile_info_uv->last_tile_size, &roi_offset_uv, &roi_size_uv);
+ thread_status = ippiResizeGetSrcOffset_L(scale->ipp_spec_uv, roi_offset_uv, &src_roi_offset_uv);
+ if (thread_status < 0)
+ return thread_status;
+
+ src_roi_uv = get_image_pointer_8u(scale->src_yuv[2], scale->src_step[2], src_roi_offset_uv.x, src_roi_offset_uv.y, scale->ipp_data_type);
+ dst_roi_uv = get_image_pointer_8u(scale->dst_yuv[2], scale->dst_step[2], roi_offset_uv.x, roi_offset_uv.y, scale->ipp_data_type);
+ thread_status = scale->ippi_resize_c1r_func(src_roi_uv, (IppSizeL)scale->src_step[2], dst_roi_uv, (IppSizeL)scale->dst_step[2], roi_offset_uv, roi_size_uv, scale->ipp_spec_uv, scale->pbuffer_v[jobnr- tile_info_uv->num_tiles]);
+ }
+ return thread_status;
+}
+
+static int ipp_resize_y420_lt_func(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+ IPPScaleContext *scale = ctx->priv;
+ IppStatus thread_status;
+ IppResizeInfo_LT *tile_info = &scale->tile_resize_info;
+
+ if (jobnr < tile_info->num_tiles) {
+ /* Pointers to the source and destination tiles */
+ const Ipp8u *src_roi = 0;
+ Ipp8u *dst_roi = 0;
+ /* Source and destination tile ROI parameters */
+ IppiPointL roiOffset = { 0 }, srcRoiOffset = { 0 };
+ IppiSizeL roiSize = { 0 };
+
+ get_tile_by_index(jobnr, tile_info->split, tile_info->tile_size, tile_info->last_tile_size, &roiOffset, &roiSize);
+
+ thread_status = ippiResizeGetSrcOffset_L(scale->ipp_spec, roiOffset, &srcRoiOffset);
+ if (thread_status < 0)
+ return thread_status;
+
+ /* Compute pointers to ROIs */
+ src_roi = get_image_pointer_8u(scale->src_yuv[0], scale->src_step[0], srcRoiOffset.x, srcRoiOffset.y, scale->ipp_data_type);
+ dst_roi = get_image_pointer_8u(scale->dst_yuv[0], scale->dst_step[0], roiOffset.x, roiOffset.y, scale->ipp_data_type);
+ thread_status = scale->ippi_resize_c1r_func(src_roi, (IppSizeL)scale->src_step[0], dst_roi, (IppSizeL)scale->dst_step[0], roiOffset, roiSize, scale->ipp_spec, scale->pbuffer_y[jobnr]);
+ } else {
+ thread_status = ipp_resize_uv420_lt_func(ctx, NULL, jobnr- tile_info->num_tiles, nb_jobs); //start from zero job
+ }
+ return thread_status;
+}
+
+static int ipp_resize_uv420_border_lt_func(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+ IPPScaleContext *scale = ctx->priv;
+ IppStatus thread_status;
+ const Ipp8u *src_roi_uv = 0;
+ Ipp8u *dst_roi_uv = 0;
+ IppiSizeL roi_size_uv = { 0 };
+ IppiPointL roi_offset_uv, src_roi_offset_uv;
+ IppResizeInfo_LT *tile_info_uv = &scale->tile_resize_info_uv;
+
+ if (jobnr < tile_info_uv->num_tiles) {
+ get_tile_by_index(jobnr, tile_info_uv->split, tile_info_uv->tile_size, tile_info_uv->last_tile_size, &roi_offset_uv, &roi_size_uv);
+
+ thread_status = ippiResizeGetSrcOffset_L(scale->ipp_spec_uv, roi_offset_uv, &src_roi_offset_uv);
+ if (thread_status < 0) return thread_status;
+ /* Compute pointers to ROIs */
+ src_roi_uv = get_image_pointer_8u(scale->src_yuv[1], scale->src_step[1], src_roi_offset_uv.x, src_roi_offset_uv.y, scale->ipp_data_type);
+ dst_roi_uv = get_image_pointer_8u(scale->dst_yuv[1], scale->dst_step[1], roi_offset_uv.x, roi_offset_uv.y, scale->ipp_data_type);
+ thread_status = scale->ippi_resize_border_c1r_func(src_roi_uv, (IppSizeL)scale->src_step[1], dst_roi_uv, (IppSizeL)scale->dst_step[1], roi_offset_uv, roi_size_uv, ippBorderRepl, NULL, scale->ipp_spec_uv, scale->pbuffer_u[jobnr]);
+ } else {
+ /* Compute pointers to ROIs */
+ get_tile_by_index(jobnr - tile_info_uv->num_tiles, tile_info_uv->split, tile_info_uv->tile_size, tile_info_uv->last_tile_size, &roi_offset_uv, &roi_size_uv);
+
+ thread_status = ippiResizeGetSrcOffset_L(scale->ipp_spec_uv, roi_offset_uv, &src_roi_offset_uv);
+ if (thread_status < 0)
+ return thread_status;
+
+ src_roi_uv = get_image_pointer_8u(scale->src_yuv[2], scale->src_step[2], src_roi_offset_uv.x, src_roi_offset_uv.y, scale->ipp_data_type);
+ dst_roi_uv = get_image_pointer_8u(scale->dst_yuv[2], scale->dst_step[2], roi_offset_uv.x, roi_offset_uv.y, scale->ipp_data_type);
+ thread_status = scale->ippi_resize_border_c1r_func(src_roi_uv, (IppSizeL)scale->src_step[2], dst_roi_uv, (IppSizeL)scale->dst_step[2], roi_offset_uv, roi_size_uv, ippBorderRepl, NULL, scale->ipp_spec_uv, scale->pbuffer_v[jobnr - tile_info_uv->num_tiles]);
+ }
+ return thread_status;
+}
+
+static int ipp_resize_y420_border_lt_func(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+ IPPScaleContext *scale = ctx->priv;
+ IppStatus thread_status;
+ IppResizeInfo_LT *tile_info = &scale->tile_resize_info;
+
+ if (jobnr < tile_info->num_tiles) {
+
+ /* Here roi means particular tile used */
+ const Ipp8u *src_roi = 0;
+ Ipp8u *dst_roi = 0;
+ /* Source and destination tile ROI parameters */
+ IppiPointL roiOffset = { 0 }, srcRoiOffset = { 0 };
+ IppiSizeL roiSize = { 0 };
+ get_tile_by_index(jobnr, tile_info->split, tile_info->tile_size, tile_info->last_tile_size, &roiOffset, &roiSize);
+
+ thread_status = ippiResizeGetSrcOffset_L(scale->ipp_spec, roiOffset, &srcRoiOffset);
+ if (thread_status < 0)
+ return thread_status;
+
+ /* Compute pointers to ROIs */
+ src_roi = get_image_pointer_8u(scale->src_yuv[0], scale->src_step[0], srcRoiOffset.x, srcRoiOffset.y, scale->ipp_data_type);
+ dst_roi = get_image_pointer_8u(scale->dst_yuv[0], scale->dst_step[0], roiOffset.x, roiOffset.y, scale->ipp_data_type);
+ thread_status = scale->ippi_resize_border_c1r_func(src_roi, (IppSizeL)scale->src_step[0], dst_roi, (IppSizeL)scale->dst_step[0], roiOffset, roiSize, ippBorderRepl, NULL, scale->ipp_spec, scale->pbuffer_y[jobnr]);
+
+ } else {
+ thread_status = ipp_resize_uv420_border_lt_func(ctx, NULL, jobnr - tile_info->num_tiles, nb_jobs);//start from zero
+ }
+ return thread_status;
+}
+
+//single threaded version - all channels resize
+static IppStatus ipp_resize_yuv420_l(IPPScaleContext *scale, const Ipp8u* src_yuv[], int* src_step, Ipp8u* dst_yuv[], int* dst_step)
+{
+ IppiPointL dst_roi_offset = { 0, 0 };
+ IppStatus ipp_sts, ipp_sts_u, ipp_sts_v;
+ ipp_sts = scale->ippi_resize_c1r_func(src_yuv[0], (IppSizeL)src_step[0], dst_yuv[0], (IppSizeL)dst_step[0], dst_roi_offset, scale->dst_size, scale->ipp_spec, scale->pbuffer_y[0]);
+
+ ipp_sts_u = scale->ippi_resize_c1r_func(src_yuv[1], (IppSizeL)src_step[1], dst_yuv[1], (IppSizeL)dst_step[1], dst_roi_offset, scale->dst_size_uv, scale->ipp_spec_uv, scale->pbuffer_y[0]);
+ ipp_sts_v = scale->ippi_resize_c1r_func(src_yuv[2], (IppSizeL)src_step[2], dst_yuv[2], (IppSizeL)dst_step[2], dst_roi_offset, scale->dst_size_uv, scale->ipp_spec_uv, scale->pbuffer_y[0]);
+
+ return ipp_sts | ipp_sts_u | ipp_sts_v;
+}
+
+//single threaded version - all channels resize
+static IppStatus ipp_resize_yuv420_border_l(IPPScaleContext *scale, const Ipp8u* src_yuv[], int* src_step, Ipp8u* dst_yuv[], int* dst_step)
+{
+ IppiPointL dst_roi_offset = { 0, 0 };
+ IppStatus ipp_sts, ipp_sts_u, ipp_sts_v;
+ ipp_sts = scale->ippi_resize_border_c1r_func(src_yuv[0], src_step[0], dst_yuv[0], dst_step[0], dst_roi_offset, scale->dst_size, ippBorderRepl, NULL, scale->ipp_spec, scale->pbuffer_y[0]);
+
+ ipp_sts_u = scale->ippi_resize_border_c1r_func(src_yuv[1], src_step[1], dst_yuv[1], dst_step[1], dst_roi_offset, scale->dst_size_uv, ippBorderRepl, NULL, scale->ipp_spec_uv, scale->pbuffer_y[0]);
+ ipp_sts_v = scale->ippi_resize_border_c1r_func(src_yuv[2], src_step[2], dst_yuv[2], dst_step[2], dst_roi_offset, scale->dst_size_uv, ippBorderRepl, NULL, scale->ipp_spec_uv, scale->pbuffer_y[0]);
+
+ return ipp_sts | ipp_sts_u | ipp_sts_v;
+}
+
+// Init ipp functions pointers
+// Get sizes for internal and initialization buffers
+static IppStatus ipp_init_resize(AVFilterLink *inlink, AVFilterLink *outlink)
+{
+ AVFilterContext *ctx = outlink->src;
+ IPPScaleContext *scale = ctx->priv;
+ IppSizeL spec_size=0, init_size=0, buffer_size=0;
+ IppSizeL spec_size_uv=0, init_size_uv=0,buffer_size_uv=0;
+ IppStatus ipp_sts, ipp_sts_uv;
+ IppSizeL min_item_number=0;
+ IppiSizeL minTileSize = { 0,0 };
+
+ //if scale->ipp_threading==0 don't use threading independently on other params
+ if (scale->ipp_threading == 1) {
+ if (ctx->nb_threads > 1) {
+ scale->ipp_threads = ff_filter_get_nb_threads(ctx);
+ } else {
+ if (ctx->nb_threads <= 0)
+ scale->ipp_threads = av_cpu_count();
+ if (ctx->nb_threads == 1)
+ scale->ipp_threading = 0;
+ }
+ }
+
+ scale->src_size.width = inlink->w;
+ scale->src_size.height = inlink->h;
+ scale->src_size_uv.width = inlink->w >> 1;
+ scale->src_size_uv.height = inlink->h >> 1;
+
+ scale->dst_size.width = outlink->w;
+ scale->dst_size.height = outlink->h;
+ scale->dst_size_uv.width = outlink->w >> 1;
+ scale->dst_size_uv.height = outlink->h >> 1;
+
+ scale->ipp_data_type = ipp8u;
+ if (inlink->format == AV_PIX_FMT_YUV420P10LE) {
+ scale->ipp_data_type = ipp16u;
+ }
+
+ scale->ippi_resize_c1r_func = NULL;
+ scale->ippi_resize_border_c1r_func = NULL;
+
+ ipp_sts = ippiResizeGetSize_L(scale->src_size, scale->dst_size, scale->ipp_data_type, scale->interpolation, scale->ipp_antialiasing,
+ &spec_size, &init_size);
+ if (ipp_sts) {
+ av_log(ctx, AV_LOG_ERROR, "ippiResizeGetSize Y error: %d\n", ipp_sts);
+ return ipp_sts;
+ }
+ if (!scale->ipp_spec)
+ scale->ipp_spec = (IppiResizeSpec *)ippsMalloc_8u(spec_size);
+ if (!scale->ipp_spec) {
+ av_log(ctx, AV_LOG_ERROR, "Cannot allocate memory for resize Y spec \n");
+ return AVERROR(ENOMEM);
+ }
+
+ ipp_sts_uv = ippiResizeGetSize_L(scale->src_size_uv, scale->dst_size_uv, scale->ipp_data_type, scale->interpolation, scale->ipp_antialiasing,
+ &spec_size_uv, &init_size_uv);
+ if (ipp_sts_uv) {
+ av_log(ctx, AV_LOG_ERROR, "ippiResizeGetSize UV error: %d\n", ipp_sts_uv);
+ return ipp_sts;
+ }
+ if (!scale->ipp_spec_uv)
+ scale->ipp_spec_uv = (IppiResizeSpec *)ippsMalloc_8u(spec_size_uv);
+ if (!scale->ipp_spec_uv) {
+ av_log(ctx, AV_LOG_ERROR, "Cannot allocate memory for resize UV spec \n");
+ return AVERROR(ENOMEM);
+ }
+
+ // allocate initialization buffer, use the max Y capacity
+ init_size = init_size > init_size_uv ? init_size : init_size_uv;
+ if (init_size &&
+ ((scale->interpolation == ippCubic) || (scale->interpolation == ippLanczos) ||
+ ((scale->ipp_antialiasing == 1) && (scale->interpolation == ippLinear)))) {
+ if (!scale->init_buffer){
+ if (scale->ipp_data_type == ipp8u)
+ scale->init_buffer = ippsMalloc_8u(init_size);
+ else
+ scale->init_buffer = (unsigned char*)ippsMalloc_16u(init_size);
+ }
+ if (!scale->init_buffer) {
+ av_log(ctx, AV_LOG_ERROR,
+ "Cannot allocate memory for resize init buffer");
+ return AVERROR(ENOMEM);
+ }
+ }
+ // init ipp resizer
+ if (scale->interpolation == ippSuper) {
+ ipp_sts = ippiResizeSuperInit_L(scale->src_size, scale->dst_size, scale->ipp_data_type, scale->ipp_spec);
+ ipp_sts_uv = ippiResizeSuperInit_L(scale->src_size_uv, scale->dst_size_uv, scale->ipp_data_type, scale->ipp_spec_uv);
+ scale->ippi_resize_y420_lt_ptr = ipp_resize_y420_lt_func;
+ scale->ippi_resize_uv420_lt_ptr = ipp_resize_uv420_lt_func;
+ scale->ipp_resize_yuv420_func = ipp_resize_yuv420_l;
+ min_item_number = RESIZE_SUPER_MIN_PIX_NUMBER;
+ minTileSize = RESIZE_SUPER_MIN_TILE_SIZE;
+ if (scale->ipp_data_type == ipp8u)
+ scale->ippi_resize_c1r_func = ippiResizeSuper_8u_C1R_L;
+ else
+ scale->ippi_resize_c1r_func = (ippiResize_C1R_L_ptr)ippiResizeSuper_16u_C1R_L;
+ } else if (scale->interpolation == ippLinear) {
+ if (scale->ipp_antialiasing == 1) {
+ ipp_sts = ippiResizeAntialiasingLinearInit_L(scale->src_size, scale->dst_size, scale->ipp_data_type, scale->ipp_spec,
+ scale->init_buffer);
+ ipp_sts_uv = ippiResizeAntialiasingLinearInit_L(scale->src_size_uv, scale->dst_size_uv, scale->ipp_data_type, scale->ipp_spec_uv,
+ scale->init_buffer);
+ if (scale->ipp_data_type == ipp8u)
+ scale->ippi_resize_border_c1r_func = ippiResizeAntialiasing_8u_C1R_L;
+ if (scale->ipp_data_type == ipp16u)
+ scale->ippi_resize_border_c1r_func = (ippiResize_border_C1R_L_ptr)ippiResizeAntialiasing_16u_C1R_L;
+ } else {
+ ipp_sts = ippiResizeLinearInit_L(scale->src_size, scale->dst_size, scale->ipp_data_type, scale->ipp_spec);
+ ipp_sts_uv = ippiResizeLinearInit_L(scale->src_size_uv, scale->dst_size_uv, scale->ipp_data_type, scale->ipp_spec_uv);
+ if (scale->ipp_data_type == ipp8u)
+ scale->ippi_resize_border_c1r_func = ippiResizeLinear_8u_C1R_L;
+ if (scale->ipp_data_type == ipp16u)
+ scale->ippi_resize_border_c1r_func = (ippiResize_border_C1R_L_ptr)ippiResizeLinear_16u_C1R_L;
+ }
+ scale->ippi_resize_y420_lt_ptr = ipp_resize_y420_border_lt_func;
+ scale->ippi_resize_uv420_lt_ptr = ipp_resize_uv420_border_lt_func;
+ scale->ipp_resize_yuv420_func = ipp_resize_yuv420_border_l;
+ min_item_number = RESIZE_LINEAR_MIN_PIX_NUMBER;
+ minTileSize = RESIZE_LINEAR_MIN_TILE_SIZE;
+ } else if (scale->interpolation == ippLanczos) {
+ // here the simplest Lanczos filtering with numLobes =2 is used.
+ if (scale->ipp_antialiasing == 1) {
+ ipp_sts = ippiResizeAntialiasingLanczosInit_L(scale->src_size, scale->dst_size, scale->ipp_data_type, 2, scale->ipp_spec,
+ scale->init_buffer);
+ ipp_sts_uv = ippiResizeAntialiasingLanczosInit_L(scale->src_size_uv, scale->dst_size_uv, scale->ipp_data_type, 2, scale->ipp_spec_uv,
+ scale->init_buffer);
+ if (scale->ipp_data_type == ipp8u)
+ scale->ippi_resize_border_c1r_func = ippiResizeAntialiasing_8u_C1R_L;
+ else
+ scale->ippi_resize_border_c1r_func = (ippiResize_border_C1R_L_ptr)ippiResizeAntialiasing_16u_C1R_L;
+ } else {
+ ipp_sts = ippiResizeLanczosInit_L(scale->src_size, scale->dst_size, scale->ipp_data_type, 2, scale->ipp_spec,
+ scale->init_buffer);
+ ipp_sts_uv = ippiResizeLanczosInit_L(scale->src_size_uv, scale->dst_size_uv, scale->ipp_data_type, 2, scale->ipp_spec_uv,
+ scale->init_buffer);
+ if (scale->ipp_data_type == ipp8u)
+ scale->ippi_resize_border_c1r_func = ippiResizeLanczos_8u_C1R_L;
+ else
+ scale->ippi_resize_border_c1r_func = (ippiResize_border_C1R_L_ptr)ippiResizeLanczos_16u_C1R_L;
+ }
+ scale->ippi_resize_y420_lt_ptr = ipp_resize_y420_border_lt_func;
+ scale->ippi_resize_uv420_lt_ptr = ipp_resize_uv420_border_lt_func;
+ scale->ipp_resize_yuv420_func = ipp_resize_yuv420_border_l;
+ min_item_number = RESIZE_LANCZOS_MIN_PIX_NUMBER;
+ minTileSize = RESIZE_LANCZOS_MIN_TILE_SIZE;
+ } else if (scale->interpolation == ippCubic) {
+ // cubic IPPI_INTER_CUBIC2P_CATMULLROM is used (B=0, C=1/2)
+ if (scale->ipp_antialiasing == 1) {
+ ipp_sts = ippiResizeAntialiasingCubicInit_L(scale->src_size, scale->dst_size, scale->ipp_data_type, 0, 0.5, scale->ipp_spec,
+ scale->init_buffer);
+ ipp_sts_uv = ippiResizeAntialiasingCubicInit_L(scale->src_size_uv, scale->dst_size_uv, scale->ipp_data_type, 0, 0.5, scale->ipp_spec_uv,
+ scale->init_buffer);
+ if (scale->ipp_data_type == ipp8u)
+ scale->ippi_resize_border_c1r_func = ippiResizeAntialiasing_8u_C1R_L;
+ else
+ scale->ippi_resize_border_c1r_func = (ippiResize_border_C1R_L_ptr)ippiResizeAntialiasing_16u_C1R_L;
+ } else {
+ ipp_sts = ippiResizeCubicInit_L(scale->src_size, scale->dst_size, scale->ipp_data_type, 0, 0.5, scale->ipp_spec,
+ scale->init_buffer);
+ ipp_sts_uv = ippiResizeCubicInit_L(scale->src_size_uv, scale->dst_size_uv, scale->ipp_data_type, 0, 0.5, scale->ipp_spec_uv,
+ scale->init_buffer);
+ if (scale->ipp_data_type == ipp8u)
+ scale->ippi_resize_border_c1r_func = ippiResizeCubic_8u_C1R_L;
+ else
+ scale->ippi_resize_border_c1r_func = (ippiResize_border_C1R_L_ptr)ippiResizeCubic_16u_C1R_L;
+ }
+ scale->ippi_resize_y420_lt_ptr = ipp_resize_y420_border_lt_func;
+ scale->ippi_resize_uv420_lt_ptr = ipp_resize_uv420_border_lt_func;
+ scale->ipp_resize_yuv420_func = ipp_resize_yuv420_border_l;
+ min_item_number = RESIZE_CUBIC_MIN_PIX_NUMBER;
+ minTileSize = RESIZE_CUBIC_MIN_TILE_SIZE;
+ } else if (scale->interpolation == ippNearest) {
+ ipp_sts = ippiResizeNearestInit_L(scale->src_size, scale->dst_size, scale->ipp_data_type, scale->ipp_spec);
+ ipp_sts_uv = ippiResizeNearestInit_L(scale->src_size_uv, scale->dst_size_uv, scale->ipp_data_type, scale->ipp_spec_uv);
+ scale->ipp_resize_yuv420_func = ipp_resize_yuv420_l;
+ scale->ippi_resize_y420_lt_ptr = ipp_resize_y420_lt_func;
+ scale->ippi_resize_uv420_lt_ptr = ipp_resize_uv420_lt_func;
+ min_item_number = RESIZE_NEAREST_MIN_PIX_NUMBER;
+ minTileSize = RESIZE_NEAREST_MIN_TILE_SIZE;
+ if (scale->ipp_data_type == ipp8u)
+ scale->ippi_resize_c1r_func = ippiResizeNearest_8u_C1R_L;
+ else
+ scale->ippi_resize_c1r_func = (ippiResize_C1R_L_ptr)ippiResizeNearest_16u_C1R_L;
+ }
+ if (ipp_sts) {
+ av_log(ctx, AV_LOG_ERROR, "ippiResizeInit Y error: %d\n", ipp_sts);
+ return ipp_sts;
+ }
+ if (ipp_sts_uv) {
+ av_log(ctx, AV_LOG_ERROR, "ippiResizeInit UV error: %d\n", ipp_sts_uv);
+ return ipp_sts;
+ }
+
+ if (scale->ipp_threading) {
+ /*Split to tiles !!*/
+ IppiSizeL tile_size, tileSizeUV;
+ int num_threads_y, num_threads_uv;
+ // 2/3 of the threads work on Y, 1/3 = 1/6 + 1/6 of threads work on U and V
+ num_threads_uv = (scale->ipp_threads > 6) ? scale->ipp_threads / 6 : 1;
+ num_threads_y = (scale->ipp_threads > 3) ? scale->ipp_threads - 2 * num_threads_uv : 1;
+
+ get_tilesize_simple(scale->dst_size, min_item_number, minTileSize, &tile_size, num_threads_y);
+ split_to_tiles(scale->dst_size, tile_size, &scale->tile_resize_info.split, &scale->tile_resize_info.tile_size, &scale->tile_resize_info.last_tile_size);
+
+ //need to do it for UV planes (not just divide Y tile Size by 2) to deal with uneven tile sizes and num_tiles difference
+ get_tilesize_simple(scale->dst_size_uv, min_item_number, minTileSize, &tileSizeUV, num_threads_uv);
+ split_to_tiles(scale->dst_size_uv, tileSizeUV, &scale->tile_resize_info_uv.split, &scale->tile_resize_info_uv.tile_size, &scale->tile_resize_info_uv.last_tile_size);
+
+ scale->tile_resize_info.num_tiles = scale->tile_resize_info.split.x * scale->tile_resize_info.split.y;
+ scale->tile_resize_info_uv.num_tiles = scale->tile_resize_info_uv.split.x * scale->tile_resize_info_uv.split.y;
+ av_log(NULL, AV_LOG_INFO, "Intel IPP uses %lld tiles (threads) for Y and %lld tiles for UV scale \n", scale->tile_resize_info.num_tiles,
+ scale->tile_resize_info_uv.num_tiles * 2);
+
+ ipp_sts = ipp_resize_get_tilebuffer_size(scale->ipp_spec, &scale->tile_resize_info);
+ ipp_sts_uv = ipp_resize_get_tilebuffer_size(scale->ipp_spec_uv, &scale->tile_resize_info_uv);
+ } else {
+ ipp_sts = ippiResizeGetBufferSize_L(scale->ipp_spec, scale->dst_size, 1, &buffer_size);
+ ipp_sts_uv = ippiResizeGetBufferSize_L(scale->ipp_spec_uv, scale->dst_size_uv, 1, &buffer_size_uv);
+ scale->tile_resize_info.last_tile_buffer_size = buffer_size;
+ scale->tile_resize_info_uv.last_tile_buffer_size = buffer_size_uv;
+ }
+ if (!scale->pbuffer_y) {
+ int err = alloc_internal_ipp_buffers(&scale->pbuffer_y, &scale->tile_resize_info, scale->ipp_data_type);
+ if (err) {
+ av_log(ctx, AV_LOG_ERROR, "Cannot allocate memory for resize buffer Y");
+ return AVERROR(ENOMEM);
+ }
+ }
+ if (!scale->pbuffer_u) {
+ int err = alloc_internal_ipp_buffers(&scale->pbuffer_u, &scale->tile_resize_info_uv, scale->ipp_data_type);
+ if (err) {
+ av_log(ctx, AV_LOG_ERROR, "Cannot allocate memory for resize buffer U");
+ return AVERROR(ENOMEM);
+ }
+ }
+ if (!scale->pbuffer_v) {
+ int err = alloc_internal_ipp_buffers(&scale->pbuffer_v, &scale->tile_resize_info_uv, scale->ipp_data_type);
+ if (err) {
+ av_log(ctx, AV_LOG_ERROR, "Cannot allocate memory for resize buffer V");
+ return AVERROR(ENOMEM);
+ }
+ }
+ return ipp_sts;
+}
+
+static int ippscale_config_props(AVFilterLink *outlink)
+{
+ AVFilterContext *ctx = outlink->src;
+ AVFilterLink *inlink = outlink->src->inputs[0];
+ IPPScaleContext *s = ctx->priv;
+ int w, h;
+ int ret;
+
+ if ((ret = ff_scale_eval_dimensions(s, s->w_expr, s->h_expr, inlink, outlink,
+ &w, &h)) < 0)
+ return ret;
+
+ ff_scale_adjust_dimensions(inlink, &w, &h, s->force_original_aspect_ratio,
+ s->force_divisible_by);
+
+ if (((int64_t)h * inlink->w) > INT_MAX || ((int64_t)w * inlink->h) > INT_MAX)
+ av_log(ctx, AV_LOG_ERROR,
+ "Rescaled value for width or height is too big.\n");
+
+ outlink->w = w;
+ outlink->h = h;
+
+ av_log(NULL, AV_LOG_INFO, "Intel IPP based scaling w:%d h:%d -> w:%d h:%d, interpolation %d \n", inlink->w, inlink->h,
+ outlink->w, outlink->h, s->interpolation);
+ if ( (((s->interpolation == ippSuper) || (s->interpolation == ippNearest)) && s->ipp_antialiasing) ) {
+ s->ipp_antialiasing =0;
+ av_log(ctx, AV_LOG_WARNING,
+ "Supersampling and Nearest neighbor interpolations don't support antialiasing,"
+ "antialiasing is disabled.\n");
+ }
+
+ if ((s->interpolation == ippSuper) &&
+ ((outlink->w > inlink->w) || (outlink->h > inlink->h)) ){
+ s->interpolation = ippCubic;
+ av_log(ctx, AV_LOG_WARNING,
+ "Supersampling not supported for upscaling, using cubic "
+ "instead.\n");
+ }
+
+ if (inlink->sample_aspect_ratio.num)
+ ff_scale_adjust_dimensions(inlink, &w, &h, s->force_original_aspect_ratio,
+ s->force_divisible_by);
+
+ if (((int64_t)h * inlink->w) > INT_MAX || ((int64_t)w * inlink->h) > INT_MAX)
+ av_log(ctx, AV_LOG_ERROR,
+ "Rescaled value for width or height is too big.\n");
+
+ outlink->w = w;
+ outlink->h = h;
+
+ av_log(NULL, AV_LOG_INFO, "Intel IPP based scaling w:%d h:%d -> w:%d h:%d, interpolation %d \n", inlink->w, inlink->h,
+ outlink->w, outlink->h, s->interpolation);
+ if ( (((s->interpolation == ippSuper) || (s->interpolation == ippNearest)) && s->ipp_antialiasing) )
+ {
+ s->ipp_antialiasing =0;
+ av_log(ctx, AV_LOG_WARNING,
+ "Supersampling and Nearest neighbor interpolations don't support antialiasing,"
+ "antialiasing is disabled.\n");
+ }
+
+ if ((s->interpolation == ippSuper) &&
+ !((outlink->w <= inlink->w) && outlink->h <= inlink->h)) {
+ s->interpolation = ippLinear;
+ av_log(ctx, AV_LOG_WARNING,
+ "Supersampling not supported for upscaling, using linear"
+ "instead.\n");
+ }
+
+ if (inlink->sample_aspect_ratio.num)
+ outlink->sample_aspect_ratio =
+ av_mul_q((AVRational){outlink->h * inlink->w, outlink->w * inlink->h},
+ inlink->sample_aspect_ratio);
+ else
+ outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
+
+ ipp_init_resize(inlink, outlink);
+
+ return 0;
+}
+
+static int check_exprs(AVFilterContext *ctx)
+{
+ IPPScaleContext *scale = ctx->priv;
+ unsigned vars_w[VARS_NB] = {0}, vars_h[VARS_NB] = {0};
+
+ if (!scale->w_pexpr && !scale->h_pexpr)
+ return AVERROR(EINVAL);
+
+ if (scale->w_pexpr)
+ av_expr_count_vars(scale->w_pexpr, vars_w, VARS_NB);
+ if (scale->h_pexpr)
+ av_expr_count_vars(scale->h_pexpr, vars_h, VARS_NB);
+
+ if (vars_w[VAR_OUT_W] || vars_w[VAR_OW]) {
+ av_log(ctx, AV_LOG_ERROR,
+ "Width expression cannot be self-referencing: '%s'.\n",
+ scale->w_expr);
+ return AVERROR(EINVAL);
+ }
+
+ if (vars_h[VAR_OUT_H] || vars_h[VAR_OH]) {
+ av_log(ctx, AV_LOG_ERROR,
+ "Height expression cannot be self-referencing: '%s'.\n",
+ scale->h_expr);
+ return AVERROR(EINVAL);
+ }
+
+ if ((vars_w[VAR_OUT_H] || vars_w[VAR_OH]) &&
+ (vars_h[VAR_OUT_W] || vars_h[VAR_OW])) {
+ av_log(ctx, AV_LOG_WARNING,
+ "Circular references detected for width '%s' and height '%s' - "
+ "possibly invalid.\n",
+ scale->w_expr, scale->h_expr);
+ }
+
+ return 0;
+}
+
+static int scale_parse_expr(AVFilterContext *ctx, char *str_expr,
+ AVExpr **pexpr_ptr, const char *var,
+ const char *args)
+{
+ IPPScaleContext *scale = ctx->priv;
+ int ret, is_inited = 0;
+ char *old_str_expr = NULL;
+ AVExpr *old_pexpr = NULL;
+
+ if (str_expr) {
+ old_str_expr = av_strdup(str_expr);
+ if (!old_str_expr)
+ return AVERROR(ENOMEM);
+ av_opt_set(scale, var, args, 0);
+ }
+
+ if (*pexpr_ptr) {
+ old_pexpr = *pexpr_ptr;
+ *pexpr_ptr = NULL;
+ is_inited = 1;
+ }
+
+ ret =
+ av_expr_parse(pexpr_ptr, args, var_names, NULL, NULL, NULL, NULL, 0, ctx);
+ if (ret < 0) {
+ av_log(ctx, AV_LOG_ERROR, "Cannot parse expression for %s: '%s'\n", var,
+ args);
+ goto revert;
+ }
+
+ ret = check_exprs(ctx);
+ if (ret < 0)
+ goto revert;
+
+ if (is_inited && (ret = ippscale_config_props(ctx->outputs[0])) < 0)
+ goto revert;
+
+ av_expr_free(old_pexpr);
+ old_pexpr = NULL;
+ av_freep(&old_str_expr);
+
+ return 0;
+
+revert:
+ av_expr_free(*pexpr_ptr);
+ *pexpr_ptr = NULL;
+ if (old_str_expr) {
+ av_opt_set(scale, var, old_str_expr, 0);
+ av_free(old_str_expr);
+ }
+ if (old_pexpr)
+ *pexpr_ptr = old_pexpr;
+
+ return ret;
+}
+
+#if defined (_M_AMD64) || defined (__x86_64__)
+
+#define IPP_SSE2 ( ippCPUID_MMX | ippCPUID_SSE | ippCPUID_SSE2 )
+#define IPP_SSE3 ( IPP_SSE2 | ippCPUID_SSE3 )
+#define IPP_SSSE3 ( IPP_SSE3 | ippCPUID_SSSE3 )
+#define IPP_SSE4 IPP_SSSE3
+#define IPP_SSE42 ( IPP_SSSE3 | ippCPUID_SSE41 | ippCPUID_SSE42 )
+#define IPP_AVX ( IPP_SSE42 | ippCPUID_AVX | ippAVX_ENABLEDBYOS | ippCPUID_F16C )
+#define IPP_AVX2 ( IPP_AVX | ippCPUID_MOVBE | ippCPUID_AVX2 | ippCPUID_PREFETCHW )
+#define IPP_AVX512 ( IPP_AVX2 | ippCPUID_AVX512F | ippCPUID_AVX512CD | ippCPUID_AVX512VL | ippCPUID_AVX512BW | ippCPUID_AVX512DQ | ippAVX512_ENABLEDBYOS )
+
+#else
+//32-bit architecture
+#define IPP_SSE2 ( ippCPUID_MMX | ippCPUID_SSE | ippCPUID_SSE2 )
+#define IPP_SSE3 IPP_SSE2
+#define IPP_SSE3_MOVBE ( IPP_SSE2 | ippCPUID_SSE3 | ippCPUID_SSSE3 | ippCPUID_MOVBE )
+#define IPP_SSSE3 IPP_SSE2
+#define IPP_SSE4 IPP_SSE2
+#define IPP_SSE42 ( IPP_SSE2 | ippCPUID_SSE3 | ippCPUID_SSSE3 | ippCPUID_SSE41 | ippCPUID_SSE42 )
+#define IPP_AVX ( IPP_SSE42 | ippCPUID_AVX | ippAVX_ENABLEDBYOS | ippCPUID_F16C )
+#define IPP_AVX2 ( IPP_AVX | ippCPUID_AVX2 | ippCPUID_MOVBE | ippCPUID_PREFETCHW )
+#define IPP_AVX512 IPP_AVX2
+
+#endif
+
+
+static IppStatus ipp_init_preferred_cpu(void)
+{
+ IppStatus status = ippStsNoErr;
+ int cpu_flags = av_get_cpu_flags();
+
+ if (EXTERNAL_AVX512(cpu_flags)) {
+ status = ippSetCpuFeatures(IPP_AVX512);
+ if (status != ippStsNoErr)
+ status = ippStsErr;
+ return status;
+ } else if (EXTERNAL_AVX2(cpu_flags)) {
+ status = ippSetCpuFeatures(IPP_AVX2);
+ if (status != ippStsNoErr)
+ status = ippStsErr;
+ return status;
+ } else if (EXTERNAL_AVX(cpu_flags)) {
+ status = ippSetCpuFeatures(IPP_AVX);
+ if (status != ippStsNoErr)
+ status = ippStsErr;
+ return status;
+ } else if (EXTERNAL_SSE42(cpu_flags)) {
+ status = ippSetCpuFeatures(IPP_SSE42);
+ if (status != ippStsNoErr)
+ status = ippStsErr;
+ return status;
+ } else if (EXTERNAL_SSE4(cpu_flags)) {
+ status = ippSetCpuFeatures(IPP_SSE4);
+ if (status != ippStsNoErr)
+ status = ippStsErr;
+ return status;
+ } else if (EXTERNAL_SSSE3(cpu_flags)) {
+ status = ippSetCpuFeatures(IPP_SSSE3);
+ if (status != ippStsNoErr)
+ status = ippStsErr;
+ return status;
+ } else if (EXTERNAL_SSE3(cpu_flags)) {
+ status = ippSetCpuFeatures(IPP_SSE3);
+ if (status != ippStsNoErr)
+ status = ippStsErr;
+ return status;
+ }
+
+ return ippInit();
+}
+
+static av_cold int ippscale_init(AVFilterContext *ctx, AVDictionary **opts)
+{
+ IPPScaleContext *scale = ctx->priv;
+ int ret;
+ if (scale->w_expr && !scale->h_expr)
+ FFSWAP(char *, scale->w_expr, scale->size_str);
+
+ if (scale->size_str) {
+ char buf[32];
+ if ((ret = av_parse_video_size(&scale->w, &scale->h, scale->size_str)) < 0) {
+ av_log(ctx, AV_LOG_ERROR, "Invalid size '%s'\n", scale->size_str);
+ return ret;
+ }
+ snprintf(buf, sizeof(buf) - 1, "%d", scale->w);
+ av_opt_set(scale, "w", buf, 0);
+ snprintf(buf, sizeof(buf) - 1, "%d", scale->h);
+ av_opt_set(scale, "h", buf, 0);
+ }
+ if (!scale->w_expr)
+ av_opt_set(scale, "w", "iw", 0);
+ if (!scale->h_expr)
+ av_opt_set(scale, "h", "ih", 0);
+
+ ret = scale_parse_expr(ctx, NULL, &scale->w_pexpr, "width", scale->w_expr);
+ if (ret < 0)
+ return ret;
+
+ ret = scale_parse_expr(ctx, NULL, &scale->h_pexpr, "height", scale->h_expr);
+ if (ret < 0)
+ return ret;
+
+ scale->flags = 0;
+ scale->opts = *opts;
+ *opts = NULL;
+
+ scale->ipp_spec = NULL;
+ scale->ipp_spec_uv = NULL;
+ scale->init_buffer = NULL;
+ scale->pbuffer_y = NULL;
+ scale->pbuffer_u = NULL;
+ scale->pbuffer_v = NULL;
+ scale->tile_resize_info.num_tiles = 1;
+ scale->tile_resize_info_uv.num_tiles = 1;
+
+ ret = (int) ipp_init_preferred_cpu();
+
+ return ret;
+}
+
+static av_cold void ippscale_uninit(AVFilterContext *ctx)
+{
+ IPPScaleContext *scale = ctx->priv;
+
+ if (scale->pbuffer_y) {
+ for (int i = 0; i < scale->tile_resize_info.num_tiles; i++) {
+ if (scale->pbuffer_y[i])
+ ippsFree(scale->pbuffer_y[i]);
+ }
+ av_free(scale->pbuffer_y);
+ }
+
+ if (scale->pbuffer_u) {
+ for (int i = 0; i < scale->tile_resize_info_uv.num_tiles; i++) {
+ if (scale->pbuffer_u[i])
+ ippsFree(scale->pbuffer_u[i]);
+ }
+ av_free(scale->pbuffer_u);
+ }
+ if (scale->pbuffer_v) {
+ for (int i = 0; i < scale->tile_resize_info_uv.num_tiles; i++) {
+ if (scale->pbuffer_v[i])
+ ippsFree(scale->pbuffer_v[i]);
+ }
+ av_free(scale->pbuffer_v);
+ }
+
+ if (scale->init_buffer)
+ ippsFree(scale->init_buffer);
+ if (scale->ipp_spec)
+ ippsFree(scale->ipp_spec);
+ if (scale->ipp_spec_uv)
+ ippsFree(scale->ipp_spec_uv);
+
+ av_expr_free(scale->w_pexpr);
+ av_expr_free(scale->h_pexpr);
+ scale->w_pexpr = scale->h_pexpr = NULL;
+
+ av_dict_free(&scale->opts);
+}
+
+static int ippscale_query_formats(AVFilterContext *ctx)
+{
+ AVFilterFormats *fmts_list = ff_make_format_list(supported_formats);
+ if (!fmts_list)
+ return AVERROR(ENOMEM);
+ return ff_set_common_formats(ctx, fmts_list);
+}
+
+static int scale_frame(AVFilterLink *link, AVFrame *in, AVFrame **frame_out)
+{
+ AVFilterContext *ctx = link->dst;
+ IPPScaleContext *scale = ctx->priv;
+ AVFilterLink *outlink = ctx->outputs[0];
+ AVFrame *out;
+ IppStatus ipp_sts=0;
+ *frame_out = NULL;
+
+ link->dst->inputs[0]->format = in->format;
+ link->dst->inputs[0]->w = in->width;
+ link->dst->inputs[0]->h = in->height;
+
+ link->dst->inputs[0]->sample_aspect_ratio.den = in->sample_aspect_ratio.den;
+ link->dst->inputs[0]->sample_aspect_ratio.num = in->sample_aspect_ratio.num;
+
+
+ if (in->interlaced_frame) {
+ // tbd do deinterlaced path
+ av_log(ctx, AV_LOG_ERROR, "Interlaced input format is not supported. \n");
+ return AVERROR(ENOSYS);
+ }
+
+ out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+ if (!out) {
+ av_frame_free(&in);
+ return AVERROR(ENOMEM);
+ }
+ *frame_out = out;
+
+ av_frame_copy_props(out, in);
+ out->width = outlink->w;
+ out->height = outlink->h;
+
+ // the whole frame processing
+ scale->src_yuv[0] = in->data[0]; scale->src_yuv[1] = in->data[1]; scale->src_yuv[2] = in->data[2];
+ scale->dst_yuv[0] = out->data[0]; scale->dst_yuv[1] = out->data[1]; scale->dst_yuv[2] = out->data[2];
+ scale->src_step[0] = in->linesize[0]; scale->src_step[1] = in->linesize[1]; scale->src_step[2] = in->linesize[2];
+ scale->dst_step[0] = out->linesize[0]; scale->dst_step[1] = out->linesize[1]; scale->dst_step[2] = out->linesize[2];
+
+ if(scale->ipp_threading){
+ ctx->internal->execute(ctx, scale->ippi_resize_y420_lt_ptr, &scale, NULL, scale->tile_resize_info.num_tiles+2*scale->tile_resize_info_uv.num_tiles);
+ } else {
+ scale->ipp_resize_yuv420_func(scale, (const Ipp8u**)in->data, in->linesize, out->data, out->linesize);
+ }
+
+ if (ipp_sts) {
+ av_log(ctx, AV_LOG_ERROR, "ippiResize_C1R error: ipp_sts %d \n",
+ ipp_sts);
+ return ipp_sts;
+ }
+ av_frame_free(&in);
+ return 0;
+}
+
+static int ippscale_filter_frame(AVFilterLink *link, AVFrame *in)
+{
+ AVFilterContext *ctx = link->dst;
+ AVFilterLink *outlink = ctx->outputs[0];
+ AVFrame *out;
+ int ret;
+
+ ret = scale_frame(link, in, &out);
+ if (out)
+ return ff_filter_frame(outlink, out);
+
+ return ret;
+}
+
+#define OFFSET(x) offsetof(IPPScaleContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM
+#define TFLAGS \
+ AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM | \
+ AV_OPT_FLAG_RUNTIME_PARAM
+
+static const AVOption ippscale_options[] = {
+ {"w", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, .flags = TFLAGS},
+ {"width", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, .flags = TFLAGS},
+ {"h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING,.flags = TFLAGS},
+ {"height", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING,.flags = TFLAGS},
+ {"size", "set video size", OFFSET(size_str),AV_OPT_TYPE_STRING, {.str = NULL}, 0, FLAGS},
+ {"s", "set video size", OFFSET(size_str),AV_OPT_TYPE_STRING, {.str = NULL}, 0, FLAGS},
+ {"force_original_aspect_ratio", "Change w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio),AV_OPT_TYPE_INT, {.i64 = 0}, 0,2, FLAGS, "force_oar"},
+ {"disable", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, 0, 0, FLAGS, "force_oar" },
+ {"decrease", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, 0, 0, FLAGS, "force_oar" },
+ {"increase", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 2 }, 0, 0, FLAGS, "force_oar" },
+ {"force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1}, 1, 256, FLAGS },
+
+ {"ipp_threading", "Enables IPP scalinfg filter threading usage (ON by default)", OFFSET(ipp_threading),AV_OPT_TYPE_BOOL, {.i64 = 1},0,1, FLAGS},
+ {"ipp_antialiasing", "Enables internal IPP anti-aliasing (OFF by default)", OFFSET(ipp_antialiasing),AV_OPT_TYPE_BOOL, {.i64 = 0},0,1, FLAGS},
+
+ {"interpolation","Interpolation algorithm used for resizing", OFFSET(interpolation), AV_OPT_TYPE_INT,{.i64 = IPPI_INTER_LINEAR},0, INT_MAX, FLAGS, "interpolation"},
+ //same as interpolation above, for compatibility with the swscale filter
+ {"flags", "Interpolation algorithm used for resizing", OFFSET(interpolation), AV_OPT_TYPE_INT,{.i64 = IPPI_INTER_LINEAR},0, INT_MAX, FLAGS, "interpolation"},
+ {"nearest", "nearest neighbour", 0, AV_OPT_TYPE_CONST, {.i64 = IPPI_INTER_NN}, 0, 0, FLAGS, "interpolation"},
+ {"linear", "linear", 0, AV_OPT_TYPE_CONST, {.i64 = IPPI_INTER_LINEAR}, 0, 0, FLAGS, "interpolation"},
+ {"bilinear", "linear by X and Y, same as linear", 0, AV_OPT_TYPE_CONST, {.i64 = IPPI_INTER_LINEAR}, 0, 0, FLAGS, "interpolation"},
+ {"cubic", "cubic (B=0, C=1/2)", 0, AV_OPT_TYPE_CONST, {.i64 = IPPI_INTER_CUBIC2P_CATMULLROM}, 0, 0, FLAGS, "interpolation"},
+ {"super", "supersampling", 0, AV_OPT_TYPE_CONST, {.i64 = IPPI_INTER_SUPER}, 0, 0, FLAGS, "interpolation"},
+ {"lanczos", "Lanczos", 0, AV_OPT_TYPE_CONST, {.i64 = IPPI_INTER_LANCZOS}, 0, 0, FLAGS, "interpolation"},
+ {NULL}};
+
+static const AVClass ippscale_class = {
+ .class_name = "ippscale",
+ .item_name = av_default_item_name,
+ .option = ippscale_options,
+ .version = LIBAVUTIL_VERSION_INT,
+};
+
+static const AVFilterPad ippscale_inputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .filter_frame = ippscale_filter_frame,
+ },
+ {NULL}
+};
+
+static const AVFilterPad ippscale_outputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .config_props = ippscale_config_props,
+ },
+ {NULL}
+};
+
+AVFilter ff_vf_scale_ipp = {
+ .name = "scale_ipp",
+ .description = NULL_IF_CONFIG_SMALL(
+ "Intel Performance Primitives library based video scaling"),
+ .init_dict = ippscale_init,
+ .uninit = ippscale_uninit,
+ .query_formats = ippscale_query_formats,
+ .priv_size = sizeof(IPPScaleContext),
+ .priv_class = &ippscale_class,
+
+ .inputs = ippscale_inputs,
+ .outputs = ippscale_outputs,
+ .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
+};