[FFmpeg-devel,V2,1/2] add support for ROI-based encoding

Submitted by Guo, Yejun on Dec. 25, 2018, 8:11 p.m.

Details

Message ID 1545768695-31176-1-git-send-email-yejun.guo@intel.com
State New
Headers show

Commit Message

Guo, Yejun Dec. 25, 2018, 8:11 p.m.
This patchset contains two patches.
- the first patch (this patch) finished the code and ask for upstream.
- the second patch is just a quick example on how to generate ROI info.

The encoders such as libx264 support different QPs offset for different MBs,
it makes possible for ROI-based encoding. It makes sense to add support
within ffmpeg to generate/accept ROI infos and pass into encoders.

Typical usage: After AVFrame is decoded, a ffmpeg filter or user's code
generates ROI info for that frame, and the encoder finally does the
ROI-based encoding.

This patch just enabled the path from ffmpeg to libx264, the more encoders
can be added later.

Signed-off-by: Guo, Yejun <yejun.guo@intel.com>
---
 libavcodec/libx264.c | 40 ++++++++++++++++++++++++++++++++++++++++
 libavutil/frame.c    |  1 +
 libavutil/frame.h    | 19 +++++++++++++++++++
 3 files changed, 60 insertions(+)

Comments

Michael Niedermayer Dec. 26, 2018, 4:43 p.m.
On Wed, Dec 26, 2018 at 04:11:35AM +0800, Guo, Yejun wrote:
> This patchset contains two patches.
> - the first patch (this patch) finished the code and ask for upstream.
> - the second patch is just a quick example on how to generate ROI info.
> 
> The encoders such as libx264 support different QPs offset for different MBs,
> it makes possible for ROI-based encoding. It makes sense to add support
> within ffmpeg to generate/accept ROI infos and pass into encoders.
> 
> Typical usage: After AVFrame is decoded, a ffmpeg filter or user's code
> generates ROI info for that frame, and the encoder finally does the
> ROI-based encoding.
> 
> This patch just enabled the path from ffmpeg to libx264, the more encoders
> can be added later.
> 
> Signed-off-by: Guo, Yejun <yejun.guo@intel.com>
> ---
>  libavcodec/libx264.c | 40 ++++++++++++++++++++++++++++++++++++++++
>  libavutil/frame.c    |  1 +
>  libavutil/frame.h    | 19 +++++++++++++++++++
>  3 files changed, 60 insertions(+)

The commit message talks about 2 patches but this commit is one patch


> 
> diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
> index a68d0a7..a4f8677 100644
> --- a/libavcodec/libx264.c
> +++ b/libavcodec/libx264.c
> @@ -40,6 +40,10 @@
>  #include <stdlib.h>
>  #include <string.h>
>  
> +// from x264.h, for quant_offsets, Macroblocks are 16x16
> +// blocks of pixels (with respect to the luma plane)
> +#define MB_SIZE 16
> +
>  typedef struct X264Context {
>      AVClass        *class;
>      x264_param_t    params;
> @@ -345,6 +349,42 @@ static int X264_frame(AVCodecContext *ctx, AVPacket *pkt, const AVFrame *frame,
>                  }
>              }
>          }
> +
> +        AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_ROIS);
> +        if (sd != NULL) {
> +            if (x4->params.rc.i_aq_mode == X264_AQ_NONE) {
> +                av_log(ctx, AV_LOG_WARNING, "Adaptive quantization must be enabled to use ROI encoding, skipping ROI.\n");
> +            } else {
> +                if (frame->interlaced_frame == 0) {
> +                    size_t mbx = (frame->width + MB_SIZE - 1) / MB_SIZE;
> +                    size_t mby = (frame->height + MB_SIZE - 1) / MB_SIZE;
> +                    float* qoffsets;
> +                    qoffsets = (float*)av_malloc(sizeof(*qoffsets) * mbx * mby);
> +                    if (qoffsets == NULL)
> +                        return AVERROR(ENOMEM);
> +                    memset(qoffsets, 0, sizeof(*qoffsets) * mbx * mby);

av_mallocz_array()



> +
> +                    size_t nb_rois = sd->size / sizeof(AVROI);
> +                    AVROI* rois = (AVROI*)sd->data;
> +                    for (size_t roi = 0; roi < nb_rois; roi++) {
> +                        int starty = FFMIN(mby, rois[roi].top / MB_SIZE);
> +                        int endy = FFMIN(mby, (rois[roi].bottom + MB_SIZE - 1)/ MB_SIZE);
> +                        int startx = FFMIN(mbx, rois[roi].left / MB_SIZE);
> +                        int endx = FFMIN(mbx, (rois[roi].right + MB_SIZE - 1)/ MB_SIZE);
> +                        for (int y = starty; y < endy; y++) {
> +                            for (int x = startx; x < endx; x++) {
> +                                qoffsets[x + y*mbx] = rois[roi].qoffset;
> +                            }
> +                        }
> +                    }
> +
> +                    x4->pic.prop.quant_offsets = qoffsets;
> +                    x4->pic.prop.quant_offsets_free = av_free;
> +                } else {
> +                    av_log(ctx, AV_LOG_WARNING, "interlaced_frame not supported for ROI encoding yet, skipping ROI.\n");
> +                }
> +            }
> +        }
>      }
>  
>      do {
> diff --git a/libavutil/frame.c b/libavutil/frame.c
> index 34a6210..bebc50e 100644
> --- a/libavutil/frame.c
> +++ b/libavutil/frame.c
> @@ -841,6 +841,7 @@ const char *av_frame_side_data_name(enum AVFrameSideDataType type)
>      case AV_FRAME_DATA_QP_TABLE_DATA:               return "QP table data";
>  #endif
>      case AV_FRAME_DATA_DYNAMIC_HDR_PLUS: return "HDR Dynamic Metadata SMPTE2094-40 (HDR10+)";
> +    case AV_FRAME_DATA_ROIS: return "Regions Of Interest";
>      }
>      return NULL;
>  }
> diff --git a/libavutil/frame.h b/libavutil/frame.h
> index 582ac47..d18d235 100644
> --- a/libavutil/frame.h
> +++ b/libavutil/frame.h
> @@ -173,6 +173,12 @@ enum AVFrameSideDataType {
>       * volume transform - application 4 of SMPTE 2094-40:2016 standard.
>       */
>      AV_FRAME_DATA_DYNAMIC_HDR_PLUS,
> +
> +    /**
> +     * Regions Of Interest, the number of ROI area is implied
> +     * in the size of buf.
> +     */
> +    AV_FRAME_DATA_ROIS,
>  };

the API addition to libavutil should be in a seperate patch from the
use of the API, it also must bump the version

The text describing AV_FRAME_DATA_ROIS also is inadequate to use it
it doesnt really describe what this side data looks like exactly

[...]
Guo, Yejun Dec. 27, 2018, 1:38 a.m.
> -----Original Message-----
> From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On Behalf
> Of Michael Niedermayer
> Sent: Thursday, December 27, 2018 12:44 AM
> To: FFmpeg development discussions and patches <ffmpeg-
> devel@ffmpeg.org>
> Subject: Re: [FFmpeg-devel] [PATCH V2 1/2] add support for ROI-based
> encoding
> 
> On Wed, Dec 26, 2018 at 04:11:35AM +0800, Guo, Yejun wrote:
> > This patchset contains two patches.
> > - the first patch (this patch) finished the code and ask for upstream.
> > - the second patch is just a quick example on how to generate ROI info.
> >
> > The encoders such as libx264 support different QPs offset for
> > different MBs, it makes possible for ROI-based encoding. It makes
> > sense to add support within ffmpeg to generate/accept ROI infos and pass
> into encoders.
> >
> > Typical usage: After AVFrame is decoded, a ffmpeg filter or user's
> > code generates ROI info for that frame, and the encoder finally does
> > the ROI-based encoding.
> >
> > This patch just enabled the path from ffmpeg to libx264, the more
> > encoders can be added later.
> >
> > Signed-off-by: Guo, Yejun <yejun.guo@intel.com>
> > ---
> >  libavcodec/libx264.c | 40
> ++++++++++++++++++++++++++++++++++++++++
> >  libavutil/frame.c    |  1 +
> >  libavutil/frame.h    | 19 +++++++++++++++++++
> >  3 files changed, 60 insertions(+)
> 
> The commit message talks about 2 patches but this commit is one patch
> 

This patch is:  [FFmpeg-devel] [PATCH V2 1/2] add support for ROI-based encoding, see http://ffmpeg.org/pipermail/ffmpeg-devel/2018-December/238135.html, 
and the other patch is:  [FFmpeg-devel] [PATCH V2 2/2] add an example to show how to fill	the ROI info , see http://ffmpeg.org/pipermail/ffmpeg-devel/2018-December/238136.html. 

I sent out the two patches almost at the same time.

Thanks for pointing out the possible confusion, to make it clear, I will remove the relative descriptions in this patch, and keep the relative descriptions in the last patch (the patch that just for a reference, not for upstream).

> 
> >
> > diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c index
> > a68d0a7..a4f8677 100644
> > --- a/libavcodec/libx264.c
> > +++ b/libavcodec/libx264.c
> > @@ -40,6 +40,10 @@
> >  #include <stdlib.h>
> >  #include <string.h>
> >
> > +// from x264.h, for quant_offsets, Macroblocks are 16x16 // blocks of
> > +pixels (with respect to the luma plane) #define MB_SIZE 16
> > +
> >  typedef struct X264Context {
> >      AVClass        *class;
> >      x264_param_t    params;
> > @@ -345,6 +349,42 @@ static int X264_frame(AVCodecContext *ctx,
> AVPacket *pkt, const AVFrame *frame,
> >                  }
> >              }
> >          }
> > +
> > +        AVFrameSideData *sd = av_frame_get_side_data(frame,
> AV_FRAME_DATA_ROIS);
> > +        if (sd != NULL) {
> > +            if (x4->params.rc.i_aq_mode == X264_AQ_NONE) {
> > +                av_log(ctx, AV_LOG_WARNING, "Adaptive quantization must be
> enabled to use ROI encoding, skipping ROI.\n");
> > +            } else {
> > +                if (frame->interlaced_frame == 0) {
> > +                    size_t mbx = (frame->width + MB_SIZE - 1) / MB_SIZE;
> > +                    size_t mby = (frame->height + MB_SIZE - 1) / MB_SIZE;
> > +                    float* qoffsets;
> > +                    qoffsets = (float*)av_malloc(sizeof(*qoffsets) * mbx * mby);
> > +                    if (qoffsets == NULL)
> > +                        return AVERROR(ENOMEM);
> > +                    memset(qoffsets, 0, sizeof(*qoffsets) * mbx *
> > + mby);
> 
> av_mallocz_array()
> 

thanks, will change to it.

> 
> 
> > +
> > +                    size_t nb_rois = sd->size / sizeof(AVROI);
> > +                    AVROI* rois = (AVROI*)sd->data;
> > +                    for (size_t roi = 0; roi < nb_rois; roi++) {
> > +                        int starty = FFMIN(mby, rois[roi].top / MB_SIZE);
> > +                        int endy = FFMIN(mby, (rois[roi].bottom + MB_SIZE - 1)/
> MB_SIZE);
> > +                        int startx = FFMIN(mbx, rois[roi].left / MB_SIZE);
> > +                        int endx = FFMIN(mbx, (rois[roi].right + MB_SIZE - 1)/
> MB_SIZE);
> > +                        for (int y = starty; y < endy; y++) {
> > +                            for (int x = startx; x < endx; x++) {
> > +                                qoffsets[x + y*mbx] = rois[roi].qoffset;
> > +                            }
> > +                        }
> > +                    }
> > +
> > +                    x4->pic.prop.quant_offsets = qoffsets;
> > +                    x4->pic.prop.quant_offsets_free = av_free;
> > +                } else {
> > +                    av_log(ctx, AV_LOG_WARNING, "interlaced_frame not
> supported for ROI encoding yet, skipping ROI.\n");
> > +                }
> > +            }
> > +        }
> >      }
> >
> >      do {
> > diff --git a/libavutil/frame.c b/libavutil/frame.c index
> > 34a6210..bebc50e 100644
> > --- a/libavutil/frame.c
> > +++ b/libavutil/frame.c
> > @@ -841,6 +841,7 @@ const char *av_frame_side_data_name(enum
> AVFrameSideDataType type)
> >      case AV_FRAME_DATA_QP_TABLE_DATA:               return "QP table data";
> >  #endif
> >      case AV_FRAME_DATA_DYNAMIC_HDR_PLUS: return "HDR Dynamic
> Metadata
> > SMPTE2094-40 (HDR10+)";
> > +    case AV_FRAME_DATA_ROIS: return "Regions Of Interest";
> >      }
> >      return NULL;
> >  }
> > diff --git a/libavutil/frame.h b/libavutil/frame.h index
> > 582ac47..d18d235 100644
> > --- a/libavutil/frame.h
> > +++ b/libavutil/frame.h
> > @@ -173,6 +173,12 @@ enum AVFrameSideDataType {
> >       * volume transform - application 4 of SMPTE 2094-40:2016 standard.
> >       */
> >      AV_FRAME_DATA_DYNAMIC_HDR_PLUS,
> > +
> > +    /**
> > +     * Regions Of Interest, the number of ROI area is implied
> > +     * in the size of buf.
> > +     */
> > +    AV_FRAME_DATA_ROIS,
> >  };
> 
> the API addition to libavutil should be in a seperate patch from the use of the
> API, it also must bump the version

thanks, will separate the patch and bump the libavutil  version.

> 
> The text describing AV_FRAME_DATA_ROIS also is inadequate to use it it
> doesnt really describe what this side data looks like exactly

thanks, will fix it.

> 
> [...]
> --
> Michael     GnuPG fingerprint:
> 9FF2128B147EF6730BADF133611EC787040B0FAB
> 
> If you fake or manipulate statistics in a paper in physics you will never get a
> job again.
> If you fake or manipulate statistics in a paper in medicin you will get a job for
> life at the pharma industry.

Patch hide | download patch | download mbox

diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
index a68d0a7..a4f8677 100644
--- a/libavcodec/libx264.c
+++ b/libavcodec/libx264.c
@@ -40,6 +40,10 @@ 
 #include <stdlib.h>
 #include <string.h>
 
+// from x264.h, for quant_offsets, Macroblocks are 16x16
+// blocks of pixels (with respect to the luma plane)
+#define MB_SIZE 16
+
 typedef struct X264Context {
     AVClass        *class;
     x264_param_t    params;
@@ -345,6 +349,42 @@  static int X264_frame(AVCodecContext *ctx, AVPacket *pkt, const AVFrame *frame,
                 }
             }
         }
+
+        AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_ROIS);
+        if (sd != NULL) {
+            if (x4->params.rc.i_aq_mode == X264_AQ_NONE) {
+                av_log(ctx, AV_LOG_WARNING, "Adaptive quantization must be enabled to use ROI encoding, skipping ROI.\n");
+            } else {
+                if (frame->interlaced_frame == 0) {
+                    size_t mbx = (frame->width + MB_SIZE - 1) / MB_SIZE;
+                    size_t mby = (frame->height + MB_SIZE - 1) / MB_SIZE;
+                    float* qoffsets;
+                    qoffsets = (float*)av_malloc(sizeof(*qoffsets) * mbx * mby);
+                    if (qoffsets == NULL)
+                        return AVERROR(ENOMEM);
+                    memset(qoffsets, 0, sizeof(*qoffsets) * mbx * mby);
+
+                    size_t nb_rois = sd->size / sizeof(AVROI);
+                    AVROI* rois = (AVROI*)sd->data;
+                    for (size_t roi = 0; roi < nb_rois; roi++) {
+                        int starty = FFMIN(mby, rois[roi].top / MB_SIZE);
+                        int endy = FFMIN(mby, (rois[roi].bottom + MB_SIZE - 1)/ MB_SIZE);
+                        int startx = FFMIN(mbx, rois[roi].left / MB_SIZE);
+                        int endx = FFMIN(mbx, (rois[roi].right + MB_SIZE - 1)/ MB_SIZE);
+                        for (int y = starty; y < endy; y++) {
+                            for (int x = startx; x < endx; x++) {
+                                qoffsets[x + y*mbx] = rois[roi].qoffset;
+                            }
+                        }
+                    }
+
+                    x4->pic.prop.quant_offsets = qoffsets;
+                    x4->pic.prop.quant_offsets_free = av_free;
+                } else {
+                    av_log(ctx, AV_LOG_WARNING, "interlaced_frame not supported for ROI encoding yet, skipping ROI.\n");
+                }
+            }
+        }
     }
 
     do {
diff --git a/libavutil/frame.c b/libavutil/frame.c
index 34a6210..bebc50e 100644
--- a/libavutil/frame.c
+++ b/libavutil/frame.c
@@ -841,6 +841,7 @@  const char *av_frame_side_data_name(enum AVFrameSideDataType type)
     case AV_FRAME_DATA_QP_TABLE_DATA:               return "QP table data";
 #endif
     case AV_FRAME_DATA_DYNAMIC_HDR_PLUS: return "HDR Dynamic Metadata SMPTE2094-40 (HDR10+)";
+    case AV_FRAME_DATA_ROIS: return "Regions Of Interest";
     }
     return NULL;
 }
diff --git a/libavutil/frame.h b/libavutil/frame.h
index 582ac47..d18d235 100644
--- a/libavutil/frame.h
+++ b/libavutil/frame.h
@@ -173,6 +173,12 @@  enum AVFrameSideDataType {
      * volume transform - application 4 of SMPTE 2094-40:2016 standard.
      */
     AV_FRAME_DATA_DYNAMIC_HDR_PLUS,
+
+    /**
+     * Regions Of Interest, the number of ROI area is implied
+     * in the size of buf.
+     */
+    AV_FRAME_DATA_ROIS,
 };
 
 enum AVActiveFormatDescription {
@@ -200,6 +206,19 @@  typedef struct AVFrameSideData {
     AVBufferRef *buf;
 } AVFrameSideData;
 
+typedef struct AVROI {
+    /* coordinates at frame pixel level.
+     * It will be extended internally if the codec requires an alignment.
+     * If the regions overlap, the last value in the list will be used.
+     */
+    size_t top;
+    size_t bottom;
+    size_t left;
+    size_t right;
+    // quant offset is encoder dependent
+    int qoffset;
+} AVROI;
+
 /**
  * This structure describes decoded (raw) audio or video data.
  *