diff mbox

[FFmpeg-devel,1/2] add support for ROI-based encoding

Message ID 1544631974-23017-1-git-send-email-yejun.guo@intel.com
State Superseded
Headers show

Commit Message

Guo, Yejun Dec. 12, 2018, 4:26 p.m. UTC
This patchset contains two patches.
- the first patch (this patch) finished the code and ask for upstream.
- the second patch is just a quick example on how to generate ROI info.

The encoders such as libx264 support different QPs offset for different MBs,
it makes possible for ROI-based encoding. It makes sense to add support
within ffmpeg to generate/accept ROI infos and pass into encoders.

Typical usage: After AVFrame is decoded, a ffmpeg filter or user's code
generates ROI info for that frame, and the encoder finally does the
ROI-based encoding. And so I choose to maintain the ROI info (AVFrameROI)
within AVFrame struct.

Since the ROI info generator might more focus on the domain knowledge of
the interest regions, instead of the encoding detail, the AVFrameROI is
designed to be more friend for ffmpeg users.

This patch just enabled the path from ffmpeg to libx264, the more encoders
can be added later.

Signed-off-by: Guo, Yejun <yejun.guo@intel.com>
---
 libavcodec/libx264.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 libavutil/frame.c    |  8 ++++++++
 libavutil/frame.h    | 24 ++++++++++++++++++++++
 3 files changed, 88 insertions(+)

Comments

Guo, Yejun Dec. 20, 2018, 6:58 a.m. UTC | #1
aks for review, thanks.

> -----Original Message-----

> From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On Behalf

> Of Guo, Yejun

> Sent: Thursday, December 13, 2018 12:26 AM

> To: ffmpeg-devel@ffmpeg.org

> Subject: [FFmpeg-devel] [PATCH 1/2] add support for ROI-based encoding

> 

> This patchset contains two patches.

> - the first patch (this patch) finished the code and ask for upstream.

> - the second patch is just a quick example on how to generate ROI info.

> 

> The encoders such as libx264 support different QPs offset for different MBs,

> it makes possible for ROI-based encoding. It makes sense to add support

> within ffmpeg to generate/accept ROI infos and pass into encoders.

> 

> Typical usage: After AVFrame is decoded, a ffmpeg filter or user's code

> generates ROI info for that frame, and the encoder finally does the ROI-

> based encoding. And so I choose to maintain the ROI info (AVFrameROI)

> within AVFrame struct.

> 

> Since the ROI info generator might more focus on the domain knowledge of

> the interest regions, instead of the encoding detail, the AVFrameROI is

> designed to be more friend for ffmpeg users.

> 

> This patch just enabled the path from ffmpeg to libx264, the more encoders

> can be added later.

> 

> Signed-off-by: Guo, Yejun <yejun.guo@intel.com>

> ---

>  libavcodec/libx264.c | 56

> ++++++++++++++++++++++++++++++++++++++++++++++++++++

>  libavutil/frame.c    |  8 ++++++++

>  libavutil/frame.h    | 24 ++++++++++++++++++++++

>  3 files changed, 88 insertions(+)

> 

> diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c index

> a68d0a7..e4e593f 100644

> --- a/libavcodec/libx264.c

> +++ b/libavcodec/libx264.c

> @@ -273,6 +273,29 @@ static void reconfig_encoder(AVCodecContext *ctx,

> const AVFrame *frame)

>      }

>  }

> 

> +static float get_roi_qoffset(AVCodecContext *ctx, enum AVRoiQuality q)

> +{

> +    // the returned value can be refined with more consideration.

> +    float qoffset = 0.0f;

> +    switch (q)

> +    {

> +    case AV_RQ_NONE:

> +        qoffset = 0.0f;

> +        break;

> +    case AV_RQ_BETTER:

> +        qoffset = -8.0f;

> +        break;

> +    case AV_RQ_BEST:

> +        qoffset = -16.0f;

> +        break;

> +    default:

> +        av_log(ctx, AV_LOG_ERROR, "unknown value of AVRoiQuality.\n");

> +        break;

> +    }

> +

> +    return qoffset;

> +}

> +

>  static int X264_frame(AVCodecContext *ctx, AVPacket *pkt, const AVFrame

> *frame,

>                        int *got_packet)

>  {

> @@ -345,6 +368,39 @@ static int X264_frame(AVCodecContext *ctx,

> AVPacket *pkt, const AVFrame *frame,

>                  }

>              }

>          }

> +

> +        if (frame->rois_buf != NULL) {

> +            if (x4->params.rc.i_aq_mode == X264_AQ_NONE) {

> +                av_log(ctx, AV_LOG_ERROR, "Adaptive quantization must be

> enabled to use ROI encoding, skipping ROI.\n");

> +            } else {

> +                if (frame->interlaced_frame == 0) {

> +                    const static int MBSIZE = 16;

> +                    size_t mbx = (frame->width + MBSIZE - 1) / MBSIZE;

> +                    size_t mby = (frame->height + MBSIZE - 1) / MBSIZE;

> +                    float* qoffsets = (float*)av_malloc(sizeof(float) * mbx * mby);

> +                    memset(qoffsets, 0, sizeof(float) * mbx * mby);

> +

> +                    size_t nb_rois = frame->rois_buf->size / sizeof(AVFrameROI);

> +                    AVFrameROI* rois = (AVFrameROI*)frame->rois_buf->data;

> +                    for (size_t roi = 0; roi < nb_rois; ++roi) {

> +                        int starty = FFMIN(mby, rois[roi].top / MBSIZE);

> +                        int endy = FFMIN(mby, (rois[roi].bottom + MBSIZE - 1)/ MBSIZE);

> +                        int startx = FFMIN(mbx, rois[roi].left / MBSIZE);

> +                        int endx = FFMIN(mbx, (rois[roi].right + MBSIZE - 1)/ MBSIZE);

> +                        for (int y = starty; y < endy; ++y) {

> +                            for (int x = startx; x < endx; ++x) {

> +                                qoffsets[x + y*mbx] = get_roi_qoffset(ctx, rois[roi].quality);

> +                            }

> +                        }

> +                    }

> +

> +                    x4->pic.prop.quant_offsets = qoffsets;

> +                    x4->pic.prop.quant_offsets_free = av_free;

> +                } else {

> +                    av_log(ctx, AV_LOG_ERROR, "interlaced_frame not supported for

> ROI encoding yet, skipping ROI.\n");

> +                }

> +            }

> +        }

>      }

> 

>      do {

> diff --git a/libavutil/frame.c b/libavutil/frame.c index 9b3fb13..dbc4b0a

> 100644

> --- a/libavutil/frame.c

> +++ b/libavutil/frame.c

> @@ -425,6 +425,13 @@ FF_DISABLE_DEPRECATION_WARNINGS

> FF_ENABLE_DEPRECATION_WARNINGS  #endif

> 

> +    av_buffer_unref(&dst->rois_buf);

> +    if (src->rois_buf) {

> +        dst->rois_buf = av_buffer_ref(src->rois_buf);

> +        if (!dst->rois_buf)

> +            return AVERROR(ENOMEM);

> +    }

> +

>      av_buffer_unref(&dst->opaque_ref);

>      av_buffer_unref(&dst->private_ref);

>      if (src->opaque_ref) {

> @@ -571,6 +578,7 @@ FF_DISABLE_DEPRECATION_WARNINGS

> FF_ENABLE_DEPRECATION_WARNINGS  #endif

> 

> +    av_buffer_unref(&frame->rois_buf);

>      av_buffer_unref(&frame->hw_frames_ctx);

> 

>      av_buffer_unref(&frame->opaque_ref);

> diff --git a/libavutil/frame.h b/libavutil/frame.h index 66f27f4..00d509d

> 100644

> --- a/libavutil/frame.h

> +++ b/libavutil/frame.h

> @@ -193,6 +193,23 @@ typedef struct AVFrameSideData {

>      AVBufferRef *buf;

>  } AVFrameSideData;

> 

> +enum AVRoiQuality {

> +    AV_RQ_NONE = 0,

> +    AV_RQ_BETTER = 1,

> +    AV_RQ_BEST = 2,

> +};

> +

> +typedef struct AVFrameROI {

> +    /* coordinates at frame pixel level.

> +     * it will be extended internally if the codec requirs an alignment

> +     */

> +    size_t top;

> +    size_t bottom;

> +    size_t left;

> +    size_t right;

> +    enum AVRoiQuality quality;

> +} AVFrameROI;

> +

>  /**

>   * This structure describes decoded (raw) audio or video data.

>   *

> @@ -556,6 +573,13 @@ typedef struct AVFrame {

>      attribute_deprecated

>      AVBufferRef *qp_table_buf;

>  #endif

> +

> +    /**

> +     * For ROI-based encoding, the number of ROI area is implied

> +     * in the size of buf.

> +     */

> +    AVBufferRef *rois_buf;

> +

>      /**

>       * For hwaccel-format frames, this should be a reference to the

>       * AVHWFramesContext describing the frame.

> --

> 2.7.4

> 

> _______________________________________________

> ffmpeg-devel mailing list

> ffmpeg-devel@ffmpeg.org

> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Mark Thompson Dec. 20, 2018, 9:08 p.m. UTC | #2
On 12/12/2018 16:26, Guo, Yejun wrote:
> This patchset contains two patches.
> - the first patch (this patch) finished the code and ask for upstream.
> - the second patch is just a quick example on how to generate ROI info.
> 
> The encoders such as libx264 support different QPs offset for different MBs,
> it makes possible for ROI-based encoding. It makes sense to add support
> within ffmpeg to generate/accept ROI infos and pass into encoders.
> 
> Typical usage: After AVFrame is decoded, a ffmpeg filter or user's code
> generates ROI info for that frame, and the encoder finally does the
> ROI-based encoding. And so I choose to maintain the ROI info (AVFrameROI)
> within AVFrame struct.
> 
> Since the ROI info generator might more focus on the domain knowledge of
> the interest regions, instead of the encoding detail, the AVFrameROI is
> designed to be more friend for ffmpeg users.
> 
> This patch just enabled the path from ffmpeg to libx264, the more encoders
> can be added later.
> 
> Signed-off-by: Guo, Yejun <yejun.guo@intel.com>
> ---
>  libavcodec/libx264.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  libavutil/frame.c    |  8 ++++++++
>  libavutil/frame.h    | 24 ++++++++++++++++++++++
>  3 files changed, 88 insertions(+)
> 
> ...
> diff --git a/libavutil/frame.c b/libavutil/frame.c
> index 9b3fb13..dbc4b0a 100644
> --- a/libavutil/frame.c
> +++ b/libavutil/frame.c
> @@ -425,6 +425,13 @@ FF_DISABLE_DEPRECATION_WARNINGS
>  FF_ENABLE_DEPRECATION_WARNINGS
>  #endif
>  
> +    av_buffer_unref(&dst->rois_buf);
> +    if (src->rois_buf) {
> +        dst->rois_buf = av_buffer_ref(src->rois_buf);
> +        if (!dst->rois_buf)
> +            return AVERROR(ENOMEM);
> +    }
> +
>      av_buffer_unref(&dst->opaque_ref);
>      av_buffer_unref(&dst->private_ref);
>      if (src->opaque_ref) {
> @@ -571,6 +578,7 @@ FF_DISABLE_DEPRECATION_WARNINGS
>  FF_ENABLE_DEPRECATION_WARNINGS
>  #endif
>  
> +    av_buffer_unref(&frame->rois_buf);
>      av_buffer_unref(&frame->hw_frames_ctx);
>  
>      av_buffer_unref(&frame->opaque_ref);
> diff --git a/libavutil/frame.h b/libavutil/frame.h
> index 66f27f4..00d509d 100644
> --- a/libavutil/frame.h
> +++ b/libavutil/frame.h
> @@ -193,6 +193,23 @@ typedef struct AVFrameSideData {
>      AVBufferRef *buf;
>  } AVFrameSideData;
>  
> +enum AVRoiQuality {
> +    AV_RQ_NONE = 0,
> +    AV_RQ_BETTER = 1,
> +    AV_RQ_BEST = 2,
> +};

I don't think I like this enum - an integer value without directly specifying this meaning would seem best for future flexibility?  Negative values might be meaningful.  A greater range would also allow ranking if there are many regions, which may be needed if some are discarded (because of hardware constraints, for example - VAAPI makes this visible).

> +
> +typedef struct AVFrameROI {
> +    /* coordinates at frame pixel level.
> +     * it will be extended internally if the codec requirs an alignment
> +     */

What is intended to happen if regions overlap?  (In the above you have it using the last value in the list.)

> +    size_t top;
> +    size_t bottom;
> +    size_t left;
> +    size_t right;
> +    enum AVRoiQuality quality;
> +} AVFrameROI;
> +
>  /**
>   * This structure describes decoded (raw) audio or video data.
>   *
> @@ -556,6 +573,13 @@ typedef struct AVFrame {
>      attribute_deprecated
>      AVBufferRef *qp_table_buf;
>  #endif
> +
> +    /**
> +     * For ROI-based encoding, the number of ROI area is implied
> +     * in the size of buf.
> +     */
> +    AVBufferRef *rois_buf;

This should be a new type of AVFrameSideData, not a new field in AVFrame.

> +
>      /**
>       * For hwaccel-format frames, this should be a reference to the
>       * AVHWFramesContext describing the frame.
> 

- Mark
Derek Buitenhuis Dec. 21, 2018, 4:36 p.m. UTC | #3
A few comments below.

On 12/12/2018 16:26, Guo, Yejun wrote:
> +        if (frame->rois_buf != NULL) {
> +            if (x4->params.rc.i_aq_mode == X264_AQ_NONE) {
> +                av_log(ctx, AV_LOG_ERROR, "Adaptive quantization must be enabled to use ROI encoding, skipping ROI.\n");

This should, in my opinion, return an error and fail hard.

If people want it to continue anyway, it should be AV_LOG_WARNING.

> +            } else {
> +                if (frame->interlaced_frame == 0) {
> +                    const static int MBSIZE = 16;

I think we generally use defines for this stuff.

> +                    size_t mbx = (frame->width + MBSIZE - 1) / MBSIZE;
> +                    size_t mby = (frame->height + MBSIZE - 1) / MBSIZE;


> +                    float* qoffsets = (float*)av_malloc(sizeof(float) * mbx * mby);

Convention in FFmpeg is to use sizeof(*var).

> +                    memset(qoffsets, 0, sizeof(float) * mbx * mby);

Missing NULL check for alloc failure.

> +
> +                    size_t nb_rois = frame->rois_buf->size / sizeof(AVFrameROI);

I think we have some macros that do this already.

> +                    AVFrameROI* rois = (AVFrameROI*)frame->rois_buf->data;
> +                    for (size_t roi = 0; roi < nb_rois; ++roi) {

Nit/convention: roi++

> +                        int starty = FFMIN(mby, rois[roi].top / MBSIZE);
> +                        int endy = FFMIN(mby, (rois[roi].bottom + MBSIZE - 1)/ MBSIZE);
> +                        int startx = FFMIN(mbx, rois[roi].left / MBSIZE);
> +                        int endx = FFMIN(mbx, (rois[roi].right + MBSIZE - 1)/ MBSIZE);
> +                        for (int y = starty; y < endy; ++y) {
> +                            for (int x = startx; x < endx; ++x) {
> +                                qoffsets[x + y*mbx] = get_roi_qoffset(ctx, rois[roi].quality);
> +                            }
> +                        }
> +                    }
> +
> +                    x4->pic.prop.quant_offsets = qoffsets;
> +                    x4->pic.prop.quant_offsets_free = av_free;
> +                } else {
> +                    av_log(ctx, AV_LOG_ERROR, "interlaced_frame not supported for ROI encoding yet, skipping ROI.\n");

Same comment as befor: return error or change to warning.



> +enum AVRoiQuality {

Probably should be AVROIQuality.

> +    AV_RQ_NONE = 0,
> +    AV_RQ_BETTER = 1,
> +    AV_RQ_BEST = 2,
> +};
> +
> +typedef struct AVFrameROI {
> +    /* coordinates at frame pixel level.
> +     * it will be extended internally if the codec requirs an alignment
> +     */
> +    size_t top;
> +    size_t bottom;
> +    size_t left;
> +    size_t right;
> +    enum AVRoiQuality quality;
> +} AVFrameROI;

Are we not going to allow the API to set an actual offset? This really
limits what someone can do. (I say this as a user of x264's ROI API, in my
own codebase, at least.)

Cheers,
- Derek
Carl Eugen Hoyos Dec. 21, 2018, 11:13 p.m. UTC | #4
2018-12-21 17:36 GMT+01:00, Derek Buitenhuis <derek.buitenhuis@gmail.com>:

> On 12/12/2018 16:26, Guo, Yejun wrote:
>> +        if (frame->rois_buf != NULL) {
>> +            if (x4->params.rc.i_aq_mode == X264_AQ_NONE) {
>> +                av_log(ctx, AV_LOG_ERROR, "Adaptive quantization
>> must be enabled to use ROI encoding, skipping ROI.\n");
>
> This should, in my opinion, return an error and fail hard.

> If people want it to continue anyway, it should be AV_LOG_WARNING.

+1

Carl Eugen
Guo, Yejun Dec. 24, 2018, 8:37 a.m. UTC | #5
> -----Original Message-----

> From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On Behalf

> Of Mark Thompson

> Sent: Friday, December 21, 2018 5:08 AM

> To: ffmpeg-devel@ffmpeg.org

> Subject: Re: [FFmpeg-devel] [PATCH 1/2] add support for ROI-based

> encoding

> 

> On 12/12/2018 16:26, Guo, Yejun wrote:

> > This patchset contains two patches.

> > - the first patch (this patch) finished the code and ask for upstream.

> > - the second patch is just a quick example on how to generate ROI info.

> >

> > The encoders such as libx264 support different QPs offset for

> > different MBs, it makes possible for ROI-based encoding. It makes

> > sense to add support within ffmpeg to generate/accept ROI infos and pass

> into encoders.

> >

> > Typical usage: After AVFrame is decoded, a ffmpeg filter or user's

> > code generates ROI info for that frame, and the encoder finally does

> > the ROI-based encoding. And so I choose to maintain the ROI info

> > (AVFrameROI) within AVFrame struct.

> >

> > Since the ROI info generator might more focus on the domain knowledge

> > of the interest regions, instead of the encoding detail, the

> > AVFrameROI is designed to be more friend for ffmpeg users.

> >

> > This patch just enabled the path from ffmpeg to libx264, the more

> > encoders can be added later.

> >

> > Signed-off-by: Guo, Yejun <yejun.guo@intel.com>

> > ---

> >  libavcodec/libx264.c | 56

> ++++++++++++++++++++++++++++++++++++++++++++++++++++

> >  libavutil/frame.c    |  8 ++++++++

> >  libavutil/frame.h    | 24 ++++++++++++++++++++++

> >  3 files changed, 88 insertions(+)

> >

> > ...

> > diff --git a/libavutil/frame.c b/libavutil/frame.c index

> > 9b3fb13..dbc4b0a 100644

> > --- a/libavutil/frame.c

> > +++ b/libavutil/frame.c

> > @@ -425,6 +425,13 @@ FF_DISABLE_DEPRECATION_WARNINGS

> > FF_ENABLE_DEPRECATION_WARNINGS  #endif

> >

> > +    av_buffer_unref(&dst->rois_buf);

> > +    if (src->rois_buf) {

> > +        dst->rois_buf = av_buffer_ref(src->rois_buf);

> > +        if (!dst->rois_buf)

> > +            return AVERROR(ENOMEM);

> > +    }

> > +

> >      av_buffer_unref(&dst->opaque_ref);

> >      av_buffer_unref(&dst->private_ref);

> >      if (src->opaque_ref) {

> > @@ -571,6 +578,7 @@ FF_DISABLE_DEPRECATION_WARNINGS

> > FF_ENABLE_DEPRECATION_WARNINGS  #endif

> >

> > +    av_buffer_unref(&frame->rois_buf);

> >      av_buffer_unref(&frame->hw_frames_ctx);

> >

> >      av_buffer_unref(&frame->opaque_ref);

> > diff --git a/libavutil/frame.h b/libavutil/frame.h index

> > 66f27f4..00d509d 100644

> > --- a/libavutil/frame.h

> > +++ b/libavutil/frame.h

> > @@ -193,6 +193,23 @@ typedef struct AVFrameSideData {

> >      AVBufferRef *buf;

> >  } AVFrameSideData;

> >

> > +enum AVRoiQuality {

> > +    AV_RQ_NONE = 0,

> > +    AV_RQ_BETTER = 1,

> > +    AV_RQ_BEST = 2,

> > +};

> 

> I don't think I like this enum - an integer value without directly specifying this

> meaning would seem best for future flexibility?  Negative values might be

> meaningful.  A greater range would also allow ranking if there are many

> regions, which may be needed if some are discarded (because of hardware

> constraints, for example - VAAPI makes this visible).


thanks, yes, the enum method also has disadvantages. 
I'll change the interface to export an integer value.

> 

> > +

> > +typedef struct AVFrameROI {

> > +    /* coordinates at frame pixel level.

> > +     * it will be extended internally if the codec requirs an alignment

> > +     */

> 

> What is intended to happen if regions overlap?  (In the above you have it

> using the last value in the list.)


Yes, the last value will be used if regions overlap.  
My idea is that the ROI generator is responsible to keep the order. I will add notes here.

> 

> > +    size_t top;

> > +    size_t bottom;

> > +    size_t left;

> > +    size_t right;

> > +    enum AVRoiQuality quality;

> > +} AVFrameROI;

> > +

> >  /**

> >   * This structure describes decoded (raw) audio or video data.

> >   *

> > @@ -556,6 +573,13 @@ typedef struct AVFrame {

> >      attribute_deprecated

> >      AVBufferRef *qp_table_buf;

> >  #endif

> > +

> > +    /**

> > +     * For ROI-based encoding, the number of ROI area is implied

> > +     * in the size of buf.

> > +     */

> > +    AVBufferRef *rois_buf;

> 

> This should be a new type of AVFrameSideData, not a new field in AVFrame.


thanks, will change to it.

> 

> > +

> >      /**

> >       * For hwaccel-format frames, this should be a reference to the

> >       * AVHWFramesContext describing the frame.

> >

> 

> - Mark

> _______________________________________________

> ffmpeg-devel mailing list

> ffmpeg-devel@ffmpeg.org

> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Guo, Yejun Dec. 24, 2018, 8:41 a.m. UTC | #6
> -----Original Message-----

> From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On Behalf

> Of Derek Buitenhuis

> Sent: Saturday, December 22, 2018 12:36 AM

> To: ffmpeg-devel@ffmpeg.org

> Subject: Re: [FFmpeg-devel] [PATCH 1/2] add support for ROI-based

> encoding

> 

> A few comments below.

> 

> On 12/12/2018 16:26, Guo, Yejun wrote:

> > +        if (frame->rois_buf != NULL) {

> > +            if (x4->params.rc.i_aq_mode == X264_AQ_NONE) {

> > +                av_log(ctx, AV_LOG_ERROR, "Adaptive quantization must

> > + be enabled to use ROI encoding, skipping ROI.\n");

> 

> This should, in my opinion, return an error and fail hard.

> 

> If people want it to continue anyway, it should be AV_LOG_WARNING.


thanks, WARNING is better, will change to it.

> 

> > +            } else {

> > +                if (frame->interlaced_frame == 0) {

> > +                    const static int MBSIZE = 16;

> 

> I think we generally use defines for this stuff.


will change to define.

> 

> > +                    size_t mbx = (frame->width + MBSIZE - 1) / MBSIZE;

> > +                    size_t mby = (frame->height + MBSIZE - 1) /

> > + MBSIZE;

> 

> 

> > +                    float* qoffsets = (float*)av_malloc(sizeof(float)

> > + * mbx * mby);

> 

> Convention in FFmpeg is to use sizeof(*var).


ok, I'll follow it.

> 

> > +                    memset(qoffsets, 0, sizeof(float) * mbx * mby);

> 

> Missing NULL check for alloc failure.


thanks, will add the check.

> 

> > +

> > +                    size_t nb_rois = frame->rois_buf->size /

> > + sizeof(AVFrameROI);

> 

> I think we have some macros that do this already.


I tried a code search and do not find one, there is a similar macro which does not work for this case: #define FF_ARRAY_ELEMS(a) (sizeof(a) / sizeof((a)[0]))
I might miss it, I'll try again to see if any luck.

> 

> > +                    AVFrameROI* rois = (AVFrameROI*)frame->rois_buf->data;

> > +                    for (size_t roi = 0; roi < nb_rois; ++roi) {

> 

> Nit/convention: roi++


ok, will change.

> 

> > +                        int starty = FFMIN(mby, rois[roi].top / MBSIZE);

> > +                        int endy = FFMIN(mby, (rois[roi].bottom + MBSIZE - 1)/

> MBSIZE);

> > +                        int startx = FFMIN(mbx, rois[roi].left / MBSIZE);

> > +                        int endx = FFMIN(mbx, (rois[roi].right + MBSIZE - 1)/ MBSIZE);

> > +                        for (int y = starty; y < endy; ++y) {

> > +                            for (int x = startx; x < endx; ++x) {


and will also change to x++ and y ++

> > +                                qoffsets[x + y*mbx] = get_roi_qoffset(ctx,

> rois[roi].quality);

> > +                            }

> > +                        }

> > +                    }

> > +

> > +                    x4->pic.prop.quant_offsets = qoffsets;

> > +                    x4->pic.prop.quant_offsets_free = av_free;

> > +                } else {

> > +                    av_log(ctx, AV_LOG_ERROR, "interlaced_frame not

> > + supported for ROI encoding yet, skipping ROI.\n");

> 

> Same comment as befor: return error or change to warning.


will change to warning.

> 

> 

> 

> > +enum AVRoiQuality {

> 

> Probably should be AVROIQuality.

> 

> > +    AV_RQ_NONE = 0,

> > +    AV_RQ_BETTER = 1,

> > +    AV_RQ_BEST = 2,

> > +};

> > +

> > +typedef struct AVFrameROI {

> > +    /* coordinates at frame pixel level.

> > +     * it will be extended internally if the codec requirs an alignment

> > +     */

> > +    size_t top;

> > +    size_t bottom;

> > +    size_t left;

> > +    size_t right;

> > +    enum AVRoiQuality quality;

> > +} AVFrameROI;

> 

> Are we not going to allow the API to set an actual offset? This really limits

> what someone can do. (I say this as a user of x264's ROI API, in my own

> codebase, at least.)


thanks, the new idea is to use an integer value instead of the enum.

> 

> Cheers,

> - Derek

> _______________________________________________

> ffmpeg-devel mailing list

> ffmpeg-devel@ffmpeg.org

> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Derek Buitenhuis Dec. 24, 2018, 4:46 p.m. UTC | #7
On 24/12/2018 08:41, Guo, Yejun wrote:
>>> +                    size_t nb_rois = frame->rois_buf->size /
>>> + sizeof(AVFrameROI);
>>
>> I think we have some macros that do this already.
> 
> I tried a code search and do not find one, there is a similar macro which does not work for this case: #define FF_ARRAY_ELEMS(a) (sizeof(a) / sizeof((a)[0]))
> I might miss it, I'll try again to see if any luck.

Right, I was thinking of FF_ARRAY_ELEMS, and you're right. You can disregard this
comment from me.

> thanks, the new idea is to use an integer value instead of the enum.

OK. I'll take a look when teh new patch lands.

Thanks! ROI support is something avcodec has needed for a while.

Cheers,
- Derek
diff mbox

Patch

diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
index a68d0a7..e4e593f 100644
--- a/libavcodec/libx264.c
+++ b/libavcodec/libx264.c
@@ -273,6 +273,29 @@  static void reconfig_encoder(AVCodecContext *ctx, const AVFrame *frame)
     }
 }
 
+static float get_roi_qoffset(AVCodecContext *ctx, enum AVRoiQuality q)
+{
+    // the returned value can be refined with more consideration.
+    float qoffset = 0.0f;
+    switch (q)
+    {
+    case AV_RQ_NONE:
+        qoffset = 0.0f;
+        break;
+    case AV_RQ_BETTER:
+        qoffset = -8.0f;
+        break;
+    case AV_RQ_BEST:
+        qoffset = -16.0f;
+        break;
+    default:
+        av_log(ctx, AV_LOG_ERROR, "unknown value of AVRoiQuality.\n");
+        break;
+    }
+
+    return qoffset;
+}
+
 static int X264_frame(AVCodecContext *ctx, AVPacket *pkt, const AVFrame *frame,
                       int *got_packet)
 {
@@ -345,6 +368,39 @@  static int X264_frame(AVCodecContext *ctx, AVPacket *pkt, const AVFrame *frame,
                 }
             }
         }
+
+        if (frame->rois_buf != NULL) {
+            if (x4->params.rc.i_aq_mode == X264_AQ_NONE) {
+                av_log(ctx, AV_LOG_ERROR, "Adaptive quantization must be enabled to use ROI encoding, skipping ROI.\n");
+            } else {
+                if (frame->interlaced_frame == 0) {
+                    const static int MBSIZE = 16;
+                    size_t mbx = (frame->width + MBSIZE - 1) / MBSIZE;
+                    size_t mby = (frame->height + MBSIZE - 1) / MBSIZE;
+                    float* qoffsets = (float*)av_malloc(sizeof(float) * mbx * mby);
+                    memset(qoffsets, 0, sizeof(float) * mbx * mby);
+
+                    size_t nb_rois = frame->rois_buf->size / sizeof(AVFrameROI);
+                    AVFrameROI* rois = (AVFrameROI*)frame->rois_buf->data;
+                    for (size_t roi = 0; roi < nb_rois; ++roi) {
+                        int starty = FFMIN(mby, rois[roi].top / MBSIZE);
+                        int endy = FFMIN(mby, (rois[roi].bottom + MBSIZE - 1)/ MBSIZE);
+                        int startx = FFMIN(mbx, rois[roi].left / MBSIZE);
+                        int endx = FFMIN(mbx, (rois[roi].right + MBSIZE - 1)/ MBSIZE);
+                        for (int y = starty; y < endy; ++y) {
+                            for (int x = startx; x < endx; ++x) {
+                                qoffsets[x + y*mbx] = get_roi_qoffset(ctx, rois[roi].quality);
+                            }
+                        }
+                    }
+
+                    x4->pic.prop.quant_offsets = qoffsets;
+                    x4->pic.prop.quant_offsets_free = av_free;
+                } else {
+                    av_log(ctx, AV_LOG_ERROR, "interlaced_frame not supported for ROI encoding yet, skipping ROI.\n");
+                }
+            }
+        }
     }
 
     do {
diff --git a/libavutil/frame.c b/libavutil/frame.c
index 9b3fb13..dbc4b0a 100644
--- a/libavutil/frame.c
+++ b/libavutil/frame.c
@@ -425,6 +425,13 @@  FF_DISABLE_DEPRECATION_WARNINGS
 FF_ENABLE_DEPRECATION_WARNINGS
 #endif
 
+    av_buffer_unref(&dst->rois_buf);
+    if (src->rois_buf) {
+        dst->rois_buf = av_buffer_ref(src->rois_buf);
+        if (!dst->rois_buf)
+            return AVERROR(ENOMEM);
+    }
+
     av_buffer_unref(&dst->opaque_ref);
     av_buffer_unref(&dst->private_ref);
     if (src->opaque_ref) {
@@ -571,6 +578,7 @@  FF_DISABLE_DEPRECATION_WARNINGS
 FF_ENABLE_DEPRECATION_WARNINGS
 #endif
 
+    av_buffer_unref(&frame->rois_buf);
     av_buffer_unref(&frame->hw_frames_ctx);
 
     av_buffer_unref(&frame->opaque_ref);
diff --git a/libavutil/frame.h b/libavutil/frame.h
index 66f27f4..00d509d 100644
--- a/libavutil/frame.h
+++ b/libavutil/frame.h
@@ -193,6 +193,23 @@  typedef struct AVFrameSideData {
     AVBufferRef *buf;
 } AVFrameSideData;
 
+enum AVRoiQuality {
+    AV_RQ_NONE = 0,
+    AV_RQ_BETTER = 1,
+    AV_RQ_BEST = 2,
+};
+
+typedef struct AVFrameROI {
+    /* coordinates at frame pixel level.
+     * it will be extended internally if the codec requirs an alignment
+     */
+    size_t top;
+    size_t bottom;
+    size_t left;
+    size_t right;
+    enum AVRoiQuality quality;
+} AVFrameROI;
+
 /**
  * This structure describes decoded (raw) audio or video data.
  *
@@ -556,6 +573,13 @@  typedef struct AVFrame {
     attribute_deprecated
     AVBufferRef *qp_table_buf;
 #endif
+
+    /**
+     * For ROI-based encoding, the number of ROI area is implied
+     * in the size of buf.
+     */
+    AVBufferRef *rois_buf;
+
     /**
      * For hwaccel-format frames, this should be a reference to the
      * AVHWFramesContext describing the frame.