From patchwork Tue Feb 25 09:14:55 2020
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Guo, Yejun" <yejun.guo@intel.com>
X-Patchwork-Id: 17926
Return-Path: <ffmpeg-devel-bounces@ffmpeg.org>
X-Original-To: patchwork@ffaux-bg.ffmpeg.org
Delivered-To: patchwork@ffaux-bg.ffmpeg.org
Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100])
	by ffaux.localdomain (Postfix) with ESMTP id 98E65447FCA
	for <patchwork@ffaux-bg.ffmpeg.org>; Tue, 25 Feb 2020 11:24:24 +0200 (EET)
Received: from [127.0.1.1] (localhost [127.0.0.1])
	by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 80B4968B668;
	Tue, 25 Feb 2020 11:24:24 +0200 (EET)
X-Original-To: ffmpeg-devel@ffmpeg.org
Delivered-To: ffmpeg-devel@ffmpeg.org
Received: from mga18.intel.com (mga18.intel.com [134.134.136.126])
 by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 41039689923
 for <ffmpeg-devel@ffmpeg.org>; Tue, 25 Feb 2020 11:24:16 +0200 (EET)
X-Amp-Result: SKIPPED(no attachment in message)
X-Amp-File-Uploaded: False
Received: from fmsmga001.fm.intel.com ([10.253.24.23])
 by orsmga106.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384;
 25 Feb 2020 01:24:14 -0800
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="5.70,483,1574150400"; d="scan'208";a="350112894"
Received: from yguo18-skl-u1604.sh.intel.com ([10.239.159.53])
 by fmsmga001.fm.intel.com with ESMTP; 25 Feb 2020 01:24:13 -0800
From: "Guo, Yejun" <yejun.guo@intel.com>
To: ffmpeg-devel@ffmpeg.org
Date: Tue, 25 Feb 2020 17:14:55 +0800
Message-Id: <1582622095-6430-1-git-send-email-yejun.guo@intel.com>
X-Mailer: git-send-email 2.7.4
Subject: [FFmpeg-devel] [PATCH V2 2/3] avfilter/vf_dnn_processing.c: add
	planar yuv format support
X-BeenThere: ffmpeg-devel@ffmpeg.org
X-Mailman-Version: 2.1.20
Precedence: list
List-Id: FFmpeg development discussions and patches <ffmpeg-devel.ffmpeg.org>
List-Unsubscribe: <https://ffmpeg.org/mailman/options/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=unsubscribe>
List-Archive: <https://ffmpeg.org/pipermail/ffmpeg-devel>
List-Post: <mailto:ffmpeg-devel@ffmpeg.org>
List-Help: <mailto:ffmpeg-devel-request@ffmpeg.org?subject=help>
List-Subscribe: <https://ffmpeg.org/mailman/listinfo/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=subscribe>
Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Cc: yejun.guo@intel.com
MIME-Version: 1.0
Errors-To: ffmpeg-devel-bounces@ffmpeg.org
Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>

Only the Y channel is handled by dnn, the UV channels are copied
without changes.

The command to use srcnn.pb (see vf_sr) looks like:
./ffmpeg -i 480p.jpg -vf format=yuv420p,scale=w=iw*2:h=ih*2,dnn_processing=dnn_backend=tensorflow:model=srcnn.pb:input=x:output=y -y srcnn.jpg

Signed-off-by: Guo, Yejun <yejun.guo@intel.com>
---
 doc/filters.texi                |  9 ++++++
 libavfilter/vf_dnn_processing.c | 72 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 81 insertions(+)

diff --git a/doc/filters.texi b/doc/filters.texi
index 8300aac..33b7857 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -9195,6 +9195,8 @@ Set the output name of the dnn network.
 
 @end table
 
+@subsection Examples
+
 @itemize
 @item
 Halve the red channle of the frame with format rgb24:
@@ -9208,6 +9210,12 @@ Halve the pixel value of the frame with format gray32f:
 ffmpeg -i input.jpg -vf format=grayf32,dnn_processing=model=halve_gray_float.model:input=dnn_in:output=dnn_out:dnn_backend=native -y out.native.png
 @end example
 
+@item
+Handle the Y channel with srcnn.pb (see @ref{sr} filter) for frame with yuv420p (planar YUV formats supported):
+@example
+./ffmpeg -i 480p.jpg -vf format=yuv420p,scale=w=iw*2:h=ih*2,dnn_processing=dnn_backend=tensorflow:model=srcnn.pb:input=x:output=y -y srcnn.jpg
+@end example
+
 @end itemize
 
 @section drawbox
@@ -17306,6 +17314,7 @@ Set quality level. The value @code{max} can be used to set the maximum level,
 currently @code{6}.
 @end table
 
+@anchor{sr}
 @section sr
 
 Scale the input by applying one of the super-resolution methods based on
diff --git a/libavfilter/vf_dnn_processing.c b/libavfilter/vf_dnn_processing.c
index 4d0ee78..f9458f0 100644
--- a/libavfilter/vf_dnn_processing.c
+++ b/libavfilter/vf_dnn_processing.c
@@ -110,6 +110,8 @@ static int query_formats(AVFilterContext *context)
     static const enum AVPixelFormat pix_fmts[] = {
         AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24,
         AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAYF32,
+        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
+        AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
         AV_PIX_FMT_NONE
     };
     AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
@@ -163,6 +165,11 @@ static int check_modelinput_inlink(const DNNData *model_input, const AVFilterLin
         }
         return 0;
     case AV_PIX_FMT_GRAYF32:
+    case AV_PIX_FMT_YUV420P:
+    case AV_PIX_FMT_YUV422P:
+    case AV_PIX_FMT_YUV444P:
+    case AV_PIX_FMT_YUV410P:
+    case AV_PIX_FMT_YUV411P:
         if (model_input->channels != 1) {
             LOG_FORMAT_CHANNEL_MISMATCH();
             return AVERROR(EIO);
@@ -246,6 +253,28 @@ static int prepare_sws_context(AVFilterLink *outlink)
                                                        0, NULL, NULL, NULL);
         }
         return 0;
+    case AV_PIX_FMT_YUV420P:
+    case AV_PIX_FMT_YUV422P:
+    case AV_PIX_FMT_YUV444P:
+    case AV_PIX_FMT_YUV410P:
+    case AV_PIX_FMT_YUV411P:
+        av_assert0(input_dt == DNN_FLOAT);
+        av_assert0(output_dt == DNN_FLOAT);
+        ctx->sws_gray8_to_grayf32 = sws_getContext(inlink->w,
+                                                   inlink->h,
+                                                   AV_PIX_FMT_GRAY8,
+                                                   inlink->w,
+                                                   inlink->h,
+                                                   AV_PIX_FMT_GRAYF32,
+                                                   0, NULL, NULL, NULL);
+        ctx->sws_grayf32_to_gray8 = sws_getContext(outlink->w,
+                                                   outlink->h,
+                                                   AV_PIX_FMT_GRAYF32,
+                                                   outlink->w,
+                                                   outlink->h,
+                                                   AV_PIX_FMT_GRAY8,
+                                                   0, NULL, NULL, NULL);
+        return 0;
     default:
         //do nothing
         break;
@@ -300,6 +329,15 @@ static int copy_from_frame_to_dnn(DnnProcessingContext *ctx, const AVFrame *fram
                             frame->data[0], frame->linesize[0],
                             bytewidth, frame->height);
         return 0;
+    case AV_PIX_FMT_YUV420P:
+    case AV_PIX_FMT_YUV422P:
+    case AV_PIX_FMT_YUV444P:
+    case AV_PIX_FMT_YUV410P:
+    case AV_PIX_FMT_YUV411P:
+        sws_scale(ctx->sws_gray8_to_grayf32, (const uint8_t **)frame->data, frame->linesize,
+                  0, frame->height, (uint8_t * const*)(&dnn_input->data),
+                  (const int [4]){frame->width * sizeof(float), 0, 0, 0});
+        return 0;
     default:
         return AVERROR(EIO);
     }
@@ -341,6 +379,15 @@ static int copy_from_dnn_to_frame(DnnProcessingContext *ctx, AVFrame *frame)
                             dnn_output->data, bytewidth,
                             bytewidth, frame->height);
         return 0;
+    case AV_PIX_FMT_YUV420P:
+    case AV_PIX_FMT_YUV422P:
+    case AV_PIX_FMT_YUV444P:
+    case AV_PIX_FMT_YUV410P:
+    case AV_PIX_FMT_YUV411P:
+        sws_scale(ctx->sws_grayf32_to_gray8, (const uint8_t *[4]){(const uint8_t *)dnn_output->data, 0, 0, 0},
+                  (const int[4]){frame->width * sizeof(float), 0, 0, 0},
+                  0, frame->height, (uint8_t * const*)frame->data, frame->linesize);
+        return 0;
     default:
         return AVERROR(EIO);
     }
@@ -348,6 +395,27 @@ static int copy_from_dnn_to_frame(DnnProcessingContext *ctx, AVFrame *frame)
     return 0;
 }
 
+static av_always_inline int isPlanarYUV(enum AVPixelFormat pix_fmt)
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
+    av_assert0(desc);
+    return !(desc->flags & AV_PIX_FMT_FLAG_RGB) && desc->nb_components == 3;
+}
+
+static int copy_uv_planes(DnnProcessingContext *ctx, AVFrame *out, const AVFrame *in)
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(in->format);
+    int uv_height = AV_CEIL_RSHIFT(in->height, desc->log2_chroma_h);
+    for (int i = 1; i < 3; ++i) {
+        int bytewidth = av_image_get_linesize(in->format, in->width, i);
+        av_image_copy_plane(out->data[i], out->linesize[i],
+                            in->data[i], in->linesize[i],
+                            bytewidth, uv_height);
+    }
+
+    return 0;
+}
+
 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 {
     AVFilterContext *context  = inlink->dst;
@@ -373,6 +441,10 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 
     av_frame_copy_props(out, in);
     copy_from_dnn_to_frame(ctx, out);
+
+    if (isPlanarYUV(in->format))
+        copy_uv_planes(ctx, out, in);
+
     av_frame_free(&in);
     return ff_filter_frame(outlink, out);
 }