From patchwork Fri Dec 27 08:34:20 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Guo, Yejun" X-Patchwork-Id: 16990 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id 057D744B047 for ; Fri, 27 Dec 2019 10:42:19 +0200 (EET) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id E41CD6880E8; Fri, 27 Dec 2019 10:42:18 +0200 (EET) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from mga11.intel.com (mga11.intel.com [192.55.52.93]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id D970A68809B for ; Fri, 27 Dec 2019 10:42:11 +0200 (EET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga003.fm.intel.com ([10.253.24.29]) by fmsmga102.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 27 Dec 2019 00:42:09 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.69,362,1571727600"; d="scan'208";a="269031857" Received: from yguo18-skl-u1604.sh.intel.com ([10.239.13.25]) by FMSMGA003.fm.intel.com with ESMTP; 27 Dec 2019 00:42:08 -0800 From: "Guo, Yejun" To: ffmpeg-devel@ffmpeg.org Date: Fri, 27 Dec 2019 16:34:20 +0800 Message-Id: <1577435660-11904-1-git-send-email-yejun.guo@intel.com> X-Mailer: git-send-email 2.7.4 Subject: [FFmpeg-devel] [PATCH 2/2] vf_dnn_processing: add support for more formats gray8 and grayf32 X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: yejun.guo@intel.com MIME-Version: 1.0 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" The following is a python script to halve the value of the gray image. It demos how to setup and execute dnn model with python+tensorflow. It also generates .pb file which will be used by ffmpeg. import tensorflow as tf import numpy as np from skimage import color from skimage import io in_img = io.imread('input.jpg') in_img = color.rgb2gray(in_img) io.imsave('ori_gray.jpg', np.squeeze(in_img)) in_data = np.expand_dims(in_img, axis=0) in_data = np.expand_dims(in_data, axis=3) filter_data = np.array([0.5]).reshape(1,1,1,1).astype(np.float32) filter = tf.Variable(filter_data) x = tf.placeholder(tf.float32, shape=[1, None, None, 1], name='dnn_in') y = tf.nn.conv2d(x, filter, strides=[1, 1, 1, 1], padding='VALID', name='dnn_out') sess=tf.Session() sess.run(tf.global_variables_initializer()) graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['dnn_out']) tf.train.write_graph(graph_def, '.', 'halve_gray_float.pb', as_text=False) print("halve_gray_float.pb generated, please use \ path_to_ffmpeg/tools/python/convert.py to generate halve_gray_float.model\n") output = sess.run(y, feed_dict={x: in_data}) output = output * 255.0 output = output.astype(np.uint8) io.imsave("out.jpg", np.squeeze(output)) To do the same thing with ffmpeg: - generate halve_gray_float.pb with the above script - generate halve_gray_float.model with tools/python/convert.py - try with following commands ./ffmpeg -i input.jpg -vf format=grayf32,dnn_processing=model=halve_gray_float.model:input=dnn_in:output=dnn_out:dnn_backend=native out.native.png ./ffmpeg -i input.jpg -vf format=grayf32,dnn_processing=model=halve_gray_float.pb:input=dnn_in:output=dnn_out:dnn_backend=tensorflow out.tf.png Signed-off-by: Guo, Yejun --- doc/filters.texi | 6 ++ libavfilter/vf_dnn_processing.c | 168 ++++++++++++++++++++++++++++++---------- 2 files changed, 132 insertions(+), 42 deletions(-) diff --git a/doc/filters.texi b/doc/filters.texi index f467378..57a129d 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -9075,6 +9075,12 @@ Halve the red channle of the frame with format rgb24: ffmpeg -i input.jpg -vf format=rgb24,dnn_processing=model=halve_first_channel.model:input=dnn_in:output=dnn_out:dnn_backend=native out.native.png @end example +@item +Halve the pixel value of the frame with format gray32f: +@example +ffmpeg -i input.jpg -vf format=grayf32,dnn_processing=model=halve_gray_float.model:input=dnn_in:output=dnn_out:dnn_backend=native -y out.native.png +@end example + @end itemize @section drawbox diff --git a/libavfilter/vf_dnn_processing.c b/libavfilter/vf_dnn_processing.c index 4a6b900..13273f2 100644 --- a/libavfilter/vf_dnn_processing.c +++ b/libavfilter/vf_dnn_processing.c @@ -104,12 +104,20 @@ static int query_formats(AVFilterContext *context) { static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24, + AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAYF32, AV_PIX_FMT_NONE }; AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts); return ff_set_common_formats(context, fmts_list); } +#define LOG_FORMAT_CHANNEL_MISMATCH() \ + av_log(ctx, AV_LOG_ERROR, \ + "the frame's format %s does not match " \ + "the model input channel %d\n", \ + av_get_pix_fmt_name(fmt), \ + model_input->channels); + static int check_modelinput_inlink(const DNNData *model_input, const AVFilterLink *inlink) { AVFilterContext *ctx = inlink->dst; @@ -131,17 +139,34 @@ static int check_modelinput_inlink(const DNNData *model_input, const AVFilterLin case AV_PIX_FMT_RGB24: case AV_PIX_FMT_BGR24: if (model_input->channels != 3) { - av_log(ctx, AV_LOG_ERROR, "the frame's input format %s does not match " - "the model input channels %d\n", - av_get_pix_fmt_name(fmt), - model_input->channels); + LOG_FORMAT_CHANNEL_MISMATCH(); return AVERROR(EIO); } if (model_input->dt != DNN_FLOAT && model_input->dt != DNN_UINT8) { av_log(ctx, AV_LOG_ERROR, "only support dnn models with input data type as float32 and uint8.\n"); return AVERROR(EIO); } - break; + return 0; + case AV_PIX_FMT_GRAY8: + if (model_input->channels != 1) { + LOG_FORMAT_CHANNEL_MISMATCH(); + return AVERROR(EIO); + } + if (model_input->dt != DNN_UINT8) { + av_log(ctx, AV_LOG_ERROR, "only support dnn models with input data type uint8.\n"); + return AVERROR(EIO); + } + return 0; + case AV_PIX_FMT_GRAYF32: + if (model_input->channels != 1) { + LOG_FORMAT_CHANNEL_MISMATCH(); + return AVERROR(EIO); + } + if (model_input->dt != DNN_FLOAT) { + av_log(ctx, AV_LOG_ERROR, "only support dnn models with input data type float32.\n"); + return AVERROR(EIO); + } + return 0; default: av_log(ctx, AV_LOG_ERROR, "%s not supported.\n", av_get_pix_fmt_name(fmt)); return AVERROR(EIO); @@ -206,28 +231,58 @@ static int config_output(AVFilterLink *outlink) static int copy_from_frame_to_dnn(DNNData *dnn_input, const AVFrame *frame) { - // extend this function to support more formats - av_assert0(frame->format == AV_PIX_FMT_RGB24 || frame->format == AV_PIX_FMT_BGR24); - - if (dnn_input->dt == DNN_FLOAT) { - float *dnn_input_data = dnn_input->data; - for (int i = 0; i < frame->height; i++) { - for(int j = 0; j < frame->width * 3; j++) { - int k = i * frame->linesize[0] + j; - int t = i * frame->width * 3 + j; - dnn_input_data[t] = frame->data[0][k] / 255.0f; + switch (frame->format) { + case AV_PIX_FMT_RGB24: + case AV_PIX_FMT_BGR24: + if (dnn_input->dt == DNN_FLOAT) { + float *dnn_input_data = dnn_input->data; + for (int i = 0; i < frame->height; i++) { + for(int j = 0; j < frame->width * 3; j++) { + int k = i * frame->linesize[0] + j; + int t = i * frame->width * 3 + j; + dnn_input_data[t] = frame->data[0][k] / 255.0f; + } + } + } else { + uint8_t *dnn_input_data = dnn_input->data; + av_assert0(dnn_input->dt == DNN_UINT8); + for (int i = 0; i < frame->height; i++) { + for(int j = 0; j < frame->width * 3; j++) { + int k = i * frame->linesize[0] + j; + int t = i * frame->width * 3 + j; + dnn_input_data[t] = frame->data[0][k]; + } } } - } else { - uint8_t *dnn_input_data = dnn_input->data; - av_assert0(dnn_input->dt == DNN_UINT8); - for (int i = 0; i < frame->height; i++) { - for(int j = 0; j < frame->width * 3; j++) { - int k = i * frame->linesize[0] + j; - int t = i * frame->width * 3 + j; - dnn_input_data[t] = frame->data[0][k]; + return 0; + case AV_PIX_FMT_GRAY8: + { + uint8_t *dnn_input_data = dnn_input->data; + av_assert0(dnn_input->dt == DNN_UINT8); + for (int i = 0; i < frame->height; i++) { + for(int j = 0; j < frame->width; j++) { + int k = i * frame->linesize[0] + j; + int t = i * frame->width + j; + dnn_input_data[t] = frame->data[0][k]; + } } } + return 0; + case AV_PIX_FMT_GRAYF32: + { + float *dnn_input_data = dnn_input->data; + av_assert0(dnn_input->dt == DNN_FLOAT); + for (int i = 0; i < frame->height; i++) { + for(int j = 0; j < frame->width; j++) { + int k = i * frame->linesize[0] + j * sizeof(float); + int t = i * frame->width + j; + dnn_input_data[t] = *(float*)(frame->data[0] + k); + } + } + } + return 0; + default: + return AVERROR(EIO); } return 0; @@ -235,28 +290,58 @@ static int copy_from_frame_to_dnn(DNNData *dnn_input, const AVFrame *frame) static int copy_from_dnn_to_frame(AVFrame *frame, const DNNData *dnn_output) { - // extend this function to support more formats - av_assert0(frame->format == AV_PIX_FMT_RGB24 || frame->format == AV_PIX_FMT_BGR24); - - if (dnn_output->dt == DNN_FLOAT) { - float *dnn_output_data = dnn_output->data; - for (int i = 0; i < frame->height; i++) { - for(int j = 0; j < frame->width * 3; j++) { - int k = i * frame->linesize[0] + j; - int t = i * frame->width * 3 + j; - frame->data[0][k] = av_clip_uintp2((int)(dnn_output_data[t] * 255.0f), 8); + switch (frame->format) { + case AV_PIX_FMT_RGB24: + case AV_PIX_FMT_BGR24: + if (dnn_output->dt == DNN_FLOAT) { + float *dnn_output_data = dnn_output->data; + for (int i = 0; i < frame->height; i++) { + for(int j = 0; j < frame->width * 3; j++) { + int k = i * frame->linesize[0] + j; + int t = i * frame->width * 3 + j; + frame->data[0][k] = av_clip_uintp2((int)(dnn_output_data[t] * 255.0f), 8); + } + } + } else { + uint8_t *dnn_output_data = dnn_output->data; + av_assert0(dnn_output->dt == DNN_UINT8); + for (int i = 0; i < frame->height; i++) { + for(int j = 0; j < frame->width * 3; j++) { + int k = i * frame->linesize[0] + j; + int t = i * frame->width * 3 + j; + frame->data[0][k] = dnn_output_data[t]; + } + } + } + return 0; + case AV_PIX_FMT_GRAY8: + { + uint8_t *dnn_output_data = dnn_output->data; + av_assert0(dnn_output->dt == DNN_UINT8); + for (int i = 0; i < frame->height; i++) { + for(int j = 0; j < frame->width; j++) { + int k = i * frame->linesize[0] + j; + int t = i * frame->width + j; + frame->data[0][k] = dnn_output_data[t]; + } } } - } else { - uint8_t *dnn_output_data = dnn_output->data; - av_assert0(dnn_output->dt == DNN_UINT8); - for (int i = 0; i < frame->height; i++) { - for(int j = 0; j < frame->width * 3; j++) { - int k = i * frame->linesize[0] + j; - int t = i * frame->width * 3 + j; - frame->data[0][k] = dnn_output_data[t]; + return 0; + case AV_PIX_FMT_GRAYF32: + { + float *dnn_output_data = dnn_output->data; + av_assert0(dnn_output->dt == DNN_FLOAT); + for (int i = 0; i < frame->height; i++) { + for(int j = 0; j < frame->width; j++) { + int k = i * frame->linesize[0] + j * sizeof(float); + int t = i * frame->width + j; + *(float*)(frame->data[0] + k) = dnn_output_data[t]; + } } } + return 0; + default: + return AVERROR(EIO); } return 0; @@ -278,7 +363,6 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) av_frame_free(&in); return AVERROR(EIO); } - av_assert0(ctx->output.channels == 3); out = ff_get_video_buffer(outlink, outlink->w, outlink->h); if (!out) {