From patchwork Tue Feb 25 09:14:55 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Guo, Yejun" X-Patchwork-Id: 17926 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id 98E65447FCA for ; Tue, 25 Feb 2020 11:24:24 +0200 (EET) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 80B4968B668; Tue, 25 Feb 2020 11:24:24 +0200 (EET) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from mga18.intel.com (mga18.intel.com [134.134.136.126]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 41039689923 for ; Tue, 25 Feb 2020 11:24:16 +0200 (EET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga001.fm.intel.com ([10.253.24.23]) by orsmga106.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 25 Feb 2020 01:24:14 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.70,483,1574150400"; d="scan'208";a="350112894" Received: from yguo18-skl-u1604.sh.intel.com ([10.239.159.53]) by fmsmga001.fm.intel.com with ESMTP; 25 Feb 2020 01:24:13 -0800 From: "Guo, Yejun" To: ffmpeg-devel@ffmpeg.org Date: Tue, 25 Feb 2020 17:14:55 +0800 Message-Id: <1582622095-6430-1-git-send-email-yejun.guo@intel.com> X-Mailer: git-send-email 2.7.4 Subject: [FFmpeg-devel] [PATCH V2 2/3] avfilter/vf_dnn_processing.c: add planar yuv format support X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: yejun.guo@intel.com MIME-Version: 1.0 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" Only the Y channel is handled by dnn, the UV channels are copied without changes. The command to use srcnn.pb (see vf_sr) looks like: ./ffmpeg -i 480p.jpg -vf format=yuv420p,scale=w=iw*2:h=ih*2,dnn_processing=dnn_backend=tensorflow:model=srcnn.pb:input=x:output=y -y srcnn.jpg Signed-off-by: Guo, Yejun --- doc/filters.texi | 9 ++++++ libavfilter/vf_dnn_processing.c | 72 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) diff --git a/doc/filters.texi b/doc/filters.texi index 8300aac..33b7857 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -9195,6 +9195,8 @@ Set the output name of the dnn network. @end table +@subsection Examples + @itemize @item Halve the red channle of the frame with format rgb24: @@ -9208,6 +9210,12 @@ Halve the pixel value of the frame with format gray32f: ffmpeg -i input.jpg -vf format=grayf32,dnn_processing=model=halve_gray_float.model:input=dnn_in:output=dnn_out:dnn_backend=native -y out.native.png @end example +@item +Handle the Y channel with srcnn.pb (see @ref{sr} filter) for frame with yuv420p (planar YUV formats supported): +@example +./ffmpeg -i 480p.jpg -vf format=yuv420p,scale=w=iw*2:h=ih*2,dnn_processing=dnn_backend=tensorflow:model=srcnn.pb:input=x:output=y -y srcnn.jpg +@end example + @end itemize @section drawbox @@ -17306,6 +17314,7 @@ Set quality level. The value @code{max} can be used to set the maximum level, currently @code{6}. @end table +@anchor{sr} @section sr Scale the input by applying one of the super-resolution methods based on diff --git a/libavfilter/vf_dnn_processing.c b/libavfilter/vf_dnn_processing.c index 4d0ee78..f9458f0 100644 --- a/libavfilter/vf_dnn_processing.c +++ b/libavfilter/vf_dnn_processing.c @@ -110,6 +110,8 @@ static int query_formats(AVFilterContext *context) static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24, AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAYF32, + AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, + AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_NONE }; AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts); @@ -163,6 +165,11 @@ static int check_modelinput_inlink(const DNNData *model_input, const AVFilterLin } return 0; case AV_PIX_FMT_GRAYF32: + case AV_PIX_FMT_YUV420P: + case AV_PIX_FMT_YUV422P: + case AV_PIX_FMT_YUV444P: + case AV_PIX_FMT_YUV410P: + case AV_PIX_FMT_YUV411P: if (model_input->channels != 1) { LOG_FORMAT_CHANNEL_MISMATCH(); return AVERROR(EIO); @@ -246,6 +253,28 @@ static int prepare_sws_context(AVFilterLink *outlink) 0, NULL, NULL, NULL); } return 0; + case AV_PIX_FMT_YUV420P: + case AV_PIX_FMT_YUV422P: + case AV_PIX_FMT_YUV444P: + case AV_PIX_FMT_YUV410P: + case AV_PIX_FMT_YUV411P: + av_assert0(input_dt == DNN_FLOAT); + av_assert0(output_dt == DNN_FLOAT); + ctx->sws_gray8_to_grayf32 = sws_getContext(inlink->w, + inlink->h, + AV_PIX_FMT_GRAY8, + inlink->w, + inlink->h, + AV_PIX_FMT_GRAYF32, + 0, NULL, NULL, NULL); + ctx->sws_grayf32_to_gray8 = sws_getContext(outlink->w, + outlink->h, + AV_PIX_FMT_GRAYF32, + outlink->w, + outlink->h, + AV_PIX_FMT_GRAY8, + 0, NULL, NULL, NULL); + return 0; default: //do nothing break; @@ -300,6 +329,15 @@ static int copy_from_frame_to_dnn(DnnProcessingContext *ctx, const AVFrame *fram frame->data[0], frame->linesize[0], bytewidth, frame->height); return 0; + case AV_PIX_FMT_YUV420P: + case AV_PIX_FMT_YUV422P: + case AV_PIX_FMT_YUV444P: + case AV_PIX_FMT_YUV410P: + case AV_PIX_FMT_YUV411P: + sws_scale(ctx->sws_gray8_to_grayf32, (const uint8_t **)frame->data, frame->linesize, + 0, frame->height, (uint8_t * const*)(&dnn_input->data), + (const int [4]){frame->width * sizeof(float), 0, 0, 0}); + return 0; default: return AVERROR(EIO); } @@ -341,6 +379,15 @@ static int copy_from_dnn_to_frame(DnnProcessingContext *ctx, AVFrame *frame) dnn_output->data, bytewidth, bytewidth, frame->height); return 0; + case AV_PIX_FMT_YUV420P: + case AV_PIX_FMT_YUV422P: + case AV_PIX_FMT_YUV444P: + case AV_PIX_FMT_YUV410P: + case AV_PIX_FMT_YUV411P: + sws_scale(ctx->sws_grayf32_to_gray8, (const uint8_t *[4]){(const uint8_t *)dnn_output->data, 0, 0, 0}, + (const int[4]){frame->width * sizeof(float), 0, 0, 0}, + 0, frame->height, (uint8_t * const*)frame->data, frame->linesize); + return 0; default: return AVERROR(EIO); } @@ -348,6 +395,27 @@ static int copy_from_dnn_to_frame(DnnProcessingContext *ctx, AVFrame *frame) return 0; } +static av_always_inline int isPlanarYUV(enum AVPixelFormat pix_fmt) +{ + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); + av_assert0(desc); + return !(desc->flags & AV_PIX_FMT_FLAG_RGB) && desc->nb_components == 3; +} + +static int copy_uv_planes(DnnProcessingContext *ctx, AVFrame *out, const AVFrame *in) +{ + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(in->format); + int uv_height = AV_CEIL_RSHIFT(in->height, desc->log2_chroma_h); + for (int i = 1; i < 3; ++i) { + int bytewidth = av_image_get_linesize(in->format, in->width, i); + av_image_copy_plane(out->data[i], out->linesize[i], + in->data[i], in->linesize[i], + bytewidth, uv_height); + } + + return 0; +} + static int filter_frame(AVFilterLink *inlink, AVFrame *in) { AVFilterContext *context = inlink->dst; @@ -373,6 +441,10 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) av_frame_copy_props(out, in); copy_from_dnn_to_frame(ctx, out); + + if (isPlanarYUV(in->format)) + copy_uv_planes(ctx, out, in); + av_frame_free(&in); return ff_filter_frame(outlink, out); }