diff mbox

[FFmpeg-devel,2/5] libx264: Update ROI behaviour to match documentation

Message ID 20190227220023.16040-2-sw@jkqxz.net
State New
Headers show

Commit Message

Mark Thompson Feb. 27, 2019, 10 p.m. UTC
Fix the quantisation offset - use the whole range, and don't change the
offset size based on bit depth.

Use correct bottom/right edge locations (they are offsets from bottom/right,
not from top/left).

Iterate the list in reverse order.  The regions are in order of decreasing
importance, so the most important must be applied last.
---
 libavcodec/libx264.c | 50 ++++++++++++++++++++++++--------------------
 1 file changed, 27 insertions(+), 23 deletions(-)

Comments

Guo, Yejun Feb. 28, 2019, 2:53 a.m. UTC | #1
> -----Original Message-----

> From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On Behalf

> Of Mark Thompson

> Sent: Thursday, February 28, 2019 6:00 AM

> To: ffmpeg-devel@ffmpeg.org

> Subject: [FFmpeg-devel] [PATCH 2/5] libx264: Update ROI behaviour to

> match documentation

> 

> Fix the quantisation offset - use the whole range, and don't change the

> offset size based on bit depth.

> 

> Use correct bottom/right edge locations (they are offsets from bottom/right,

> not from top/left).

> 

> Iterate the list in reverse order.  The regions are in order of decreasing

> importance, so the most important must be applied last.

> ---

>  libavcodec/libx264.c | 50 ++++++++++++++++++++++++--------------------

>  1 file changed, 27 insertions(+), 23 deletions(-)

> 

> diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c

> index a3493f393d..475719021e 100644

> --- a/libavcodec/libx264.c

> +++ b/libavcodec/libx264.c

> @@ -285,16 +285,18 @@ static int X264_frame(AVCodecContext *ctx,

> AVPacket *pkt, const AVFrame *frame,

>      int nnal, i, ret;

>      x264_picture_t pic_out = {0};

>      int pict_type;

> +    int bit_depth;

>      int64_t *out_opaque;

>      AVFrameSideData *sd;

> 

>      x264_picture_init( &x4->pic );

>      x4->pic.img.i_csp   = x4->params.i_csp;

>  #if X264_BUILD >= 153

> -    if (x4->params.i_bitdepth > 8)

> +    bit_depth = x4->params.i_bitdepth;

>  #else

> -    if (x264_bit_depth > 8)

> +    bit_depth = x264_bit_depth;

>  #endif

> +    if (bit_depth > 8)

>          x4->pic.img.i_csp |= X264_CSP_HIGH_DEPTH;

>      x4->pic.img.i_plane = avfmt2_num_planes(ctx->pix_fmt);

> 

> @@ -359,45 +361,47 @@ static int X264_frame(AVCodecContext *ctx,

> AVPacket *pkt, const AVFrame *frame,

>                  if (frame->interlaced_frame == 0) {

>                      int mbx = (frame->width + MB_SIZE - 1) / MB_SIZE;

>                      int mby = (frame->height + MB_SIZE - 1) / MB_SIZE;

> +                    int qp_range = 51 + 6 * (bit_depth - 8);


just found following from "$ ./x264 --fullhelp", not sure what 81 means here. Shall we change our qoffset formula accordingly?
      --qpmin <integer>       Set min QP [0]
      --qpmax <integer>       Set max QP [81]

>                      int nb_rois;

> -                    AVRegionOfInterest* roi;

> -                    float* qoffsets;

> +                    const AVRegionOfInterest *roi;

> +                    float *qoffsets;

>                      qoffsets = av_mallocz_array(mbx * mby, sizeof(*qoffsets));

>                      if (!qoffsets)

>                          return AVERROR(ENOMEM);

> 

> -                    nb_rois = sd->size / sizeof(AVRegionOfInterest);

> -                    roi = (AVRegionOfInterest*)sd->data;

> -                    for (int count = 0; count < nb_rois; count++) {

> -                        int starty = FFMIN(mby, roi->top / MB_SIZE);

> -                        int endy   = FFMIN(mby, (roi->bottom + MB_SIZE - 1)/ MB_SIZE);

> -                        int startx = FFMIN(mbx, roi->left / MB_SIZE);

> -                        int endx   = FFMIN(mbx, (roi->right + MB_SIZE - 1)/ MB_SIZE);

> +                    roi = (const AVRegionOfInterest*)sd->data;

> +                    if (!roi->self_size || sd->size % roi->self_size != 0) {

> +                        av_log(ctx, AV_LOG_ERROR, "Invalid

> AVRegionOfInterest.self_size.\n");

> +                        return AVERROR(EINVAL);

> +                    }

> +                    nb_rois = sd->size / roi->self_size;

> +

> +                    // This list must be iterated in reverse because regions are

> +                    // defined in order of decreasing importance.


Nit:   the reason may be more straight forward.
This list must be iterated in reverse because: when overlapping regions are defined, the first region containing a given area of the frame applies.

> +                    for (int i = nb_rois - 1; i >= 0; i--) {

> +                        int startx, endx, starty, endy;

>                          float qoffset;

> 

> +                        roi = (const AVRegionOfInterest*)(sd->data + roi->self_size * i);

> +

> +                        starty = av_clip(roi->top / MB_SIZE, 0, mby);

> +                        endy   = av_clip((frame->height - roi->bottom + MB_SIZE - 1) /

> MB_SIZE, 0, mby);

> +                        startx = av_clip(roi->left / MB_SIZE, 0, mbx);

> +                        endx   = av_clip((frame->width - roi->right + MB_SIZE - 1) /

> MB_SIZE, 0, mbx);


not quite understand why endx/endy is calculated so.
For example, for a 1920x1080 frame, and roi->top is 0, and roi->bottom is 1080, then,
starty is 0, endy is 0, which make the following loop does not work.

                          for (int y = starty; y < endy; y++) {
                              for (int x = startx; x < endx; x++) {
                                  qoffsets[x + y*mbx] = qoffset;
                              }
                          }

> +

>                          if (roi->qoffset.den == 0) {

>                              av_free(qoffsets);

> -                            av_log(ctx, AV_LOG_ERROR,

> "AVRegionOfInterest.qoffset.den should not be zero.\n");

> +                            av_log(ctx, AV_LOG_ERROR,

> "AVRegionOfInterest.qoffset.den must not be zero.\n");

>                              return AVERROR(EINVAL);

>                          }

>                          qoffset = roi->qoffset.num * 1.0f / roi->qoffset.den;

> -                        qoffset = av_clipf(qoffset, -1.0f, 1.0f);

> -

> -                        // 25 is a number that I think it is a possible proper scale value.

> -                        qoffset = qoffset * 25;

> +                        qoffset = av_clipf(qoffset * qp_range, -qp_range, +qp_range);

> 

>                          for (int y = starty; y < endy; y++) {

>                              for (int x = startx; x < endx; x++) {

>                                  qoffsets[x + y*mbx] = qoffset;

>                              }

>                          }

> -

> -                        if (roi->self_size == 0) {

> -                            av_free(qoffsets);

> -                            av_log(ctx, AV_LOG_ERROR, "AVRegionOfInterest.self_size

> should be set to sizeof(AVRegionOfInterest).\n");

> -                            return AVERROR(EINVAL);

> -                        }

> -                        roi = (AVRegionOfInterest*)((char*)roi + roi->self_size);

>                      }

> 

>                      x4->pic.prop.quant_offsets = qoffsets;

> --

> 2.19.2

> 

> _______________________________________________

> ffmpeg-devel mailing list

> ffmpeg-devel@ffmpeg.org

> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Jun Zhao Feb. 28, 2019, 3:25 a.m. UTC | #2
On Thu, Feb 28, 2019 at 10:53 AM Guo, Yejun <yejun.guo@intel.com> wrote:
>
>
>
> > -----Original Message-----
> > From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On Behalf
> > Of Mark Thompson
> > Sent: Thursday, February 28, 2019 6:00 AM
> > To: ffmpeg-devel@ffmpeg.org
> > Subject: [FFmpeg-devel] [PATCH 2/5] libx264: Update ROI behaviour to
> > match documentation
> >
> > Fix the quantisation offset - use the whole range, and don't change the
> > offset size based on bit depth.
> >
> > Use correct bottom/right edge locations (they are offsets from
bottom/right,
> > not from top/left).
> >
> > Iterate the list in reverse order.  The regions are in order of
decreasing
> > importance, so the most important must be applied last.
> > ---
> >  libavcodec/libx264.c | 50 ++++++++++++++++++++++++--------------------
> >  1 file changed, 27 insertions(+), 23 deletions(-)
> >
> > diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
> > index a3493f393d..475719021e 100644
> > --- a/libavcodec/libx264.c
> > +++ b/libavcodec/libx264.c
> > @@ -285,16 +285,18 @@ static int X264_frame(AVCodecContext *ctx,
> > AVPacket *pkt, const AVFrame *frame,
> >      int nnal, i, ret;
> >      x264_picture_t pic_out = {0};
> >      int pict_type;
> > +    int bit_depth;
> >      int64_t *out_opaque;
> >      AVFrameSideData *sd;
> >
> >      x264_picture_init( &x4->pic );
> >      x4->pic.img.i_csp   = x4->params.i_csp;
> >  #if X264_BUILD >= 153
> > -    if (x4->params.i_bitdepth > 8)
> > +    bit_depth = x4->params.i_bitdepth;
> >  #else
> > -    if (x264_bit_depth > 8)
> > +    bit_depth = x264_bit_depth;
> >  #endif
> > +    if (bit_depth > 8)
> >          x4->pic.img.i_csp |= X264_CSP_HIGH_DEPTH;
> >      x4->pic.img.i_plane = avfmt2_num_planes(ctx->pix_fmt);
> >
> > @@ -359,45 +361,47 @@ static int X264_frame(AVCodecContext *ctx,
> > AVPacket *pkt, const AVFrame *frame,
> >                  if (frame->interlaced_frame == 0) {
> >                      int mbx = (frame->width + MB_SIZE - 1) / MB_SIZE;
> >                      int mby = (frame->height + MB_SIZE - 1) / MB_SIZE;
> > +                    int qp_range = 51 + 6 * (bit_depth - 8);
>
> just found following from "$ ./x264 --fullhelp", not sure what 81 means
here. Shall we change our qoffset formula accordingly?
>       --qpmin <integer>       Set min QP [0]
>       --qpmax <integer>       Set max QP [81]
>
> >                      int nb_rois;
> > -                    AVRegionOfInterest* roi;
> > -                    float* qoffsets;
> > +                    const AVRegionOfInterest *roi;
> > +                    float *qoffsets;
> >                      qoffsets = av_mallocz_array(mbx * mby,
sizeof(*qoffsets));
> >                      if (!qoffsets)
> >                          return AVERROR(ENOMEM);
> >
> > -                    nb_rois = sd->size / sizeof(AVRegionOfInterest);
> > -                    roi = (AVRegionOfInterest*)sd->data;
> > -                    for (int count = 0; count < nb_rois; count++) {
> > -                        int starty = FFMIN(mby, roi->top / MB_SIZE);
> > -                        int endy   = FFMIN(mby, (roi->bottom + MB_SIZE
- 1)/ MB_SIZE);
> > -                        int startx = FFMIN(mbx, roi->left / MB_SIZE);
> > -                        int endx   = FFMIN(mbx, (roi->right + MB_SIZE
- 1)/ MB_SIZE);
> > +                    roi = (const AVRegionOfInterest*)sd->data;
> > +                    if (!roi->self_size || sd->size % roi->self_size
!= 0) {
> > +                        av_log(ctx, AV_LOG_ERROR, "Invalid
> > AVRegionOfInterest.self_size.\n");
> > +                        return AVERROR(EINVAL);
> > +                    }
> > +                    nb_rois = sd->size / roi->self_size;
> > +
> > +                    // This list must be iterated in reverse because
regions are
> > +                    // defined in order of decreasing importance.
>
> Nit:   the reason may be more straight forward.
> This list must be iterated in reverse because: when overlapping regions
are defined, the first region containing a given area of the frame applies.
>
> > +                    for (int i = nb_rois - 1; i >= 0; i--) {
> > +                        int startx, endx, starty, endy;
> >                          float qoffset;
> >
> > +                        roi = (const AVRegionOfInterest*)(sd->data +
roi->self_size * i);
> > +
> > +                        starty = av_clip(roi->top / MB_SIZE, 0, mby);
> > +                        endy   = av_clip((frame->height - roi->bottom
+ MB_SIZE - 1) /
> > MB_SIZE, 0, mby);
> > +                        startx = av_clip(roi->left / MB_SIZE, 0, mbx);
> > +                        endx   = av_clip((frame->width - roi->right +
MB_SIZE - 1) /
> > MB_SIZE, 0, mbx);
>
> not quite understand why endx/endy is calculated so.
> For example, for a 1920x1080 frame, and roi->top is 0, and roi->bottom is
1080, then,
> starty is 0, endy is 0, which make the following loop does not work.

I think Mark use the (left/top) and (right/bottom) as the offset, like this:
in the fig, (left/top) == (s1x, s1y), (right/bottom) ==(s2x,s2y)


+-----------------------+------> w (x)
|          ^            |
|          | s1y        |
|          V            |
|      +***********+    |
| s1x  *           * s2x|
|<-->  *  ROI      *<-->|
|      *           *    |
|      +***********+    |
|          ^            |
|          | s2y        |
|          V            |
|-----------------------+
|
V

h (y)

>
>                           for (int y = starty; y < endy; y++) {
>                               for (int x = startx; x < endx; x++) {
>                                   qoffsets[x + y*mbx] = qoffset;
>                               }
>                           }
>
> > +
> >                          if (roi->qoffset.den == 0) {
> >                              av_free(qoffsets);
> > -                            av_log(ctx, AV_LOG_ERROR,
> > "AVRegionOfInterest.qoffset.den should not be zero.\n");
> > +                            av_log(ctx, AV_LOG_ERROR,
> > "AVRegionOfInterest.qoffset.den must not be zero.\n");
> >                              return AVERROR(EINVAL);
> >                          }
> >                          qoffset = roi->qoffset.num * 1.0f /
roi->qoffset.den;
> > -                        qoffset = av_clipf(qoffset, -1.0f, 1.0f);
> > -
> > -                        // 25 is a number that I think it is a
possible proper scale value.
> > -                        qoffset = qoffset * 25;
> > +                        qoffset = av_clipf(qoffset * qp_range,
-qp_range, +qp_range);
> >
> >                          for (int y = starty; y < endy; y++) {
> >                              for (int x = startx; x < endx; x++) {
> >                                  qoffsets[x + y*mbx] = qoffset;
> >                              }
> >                          }
> > -
> > -                        if (roi->self_size == 0) {
> > -                            av_free(qoffsets);
> > -                            av_log(ctx, AV_LOG_ERROR,
"AVRegionOfInterest.self_size
> > should be set to sizeof(AVRegionOfInterest).\n");
> > -                            return AVERROR(EINVAL);
> > -                        }
> > -                        roi = (AVRegionOfInterest*)((char*)roi +
roi->self_size);
> >                      }
> >
> >                      x4->pic.prop.quant_offsets = qoffsets;
> > --
> > 2.19.2
> >
Guo, Yejun Feb. 28, 2019, 6:38 a.m. UTC | #3
> -----Original Message-----

> From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On Behalf

> Of mypopy@gmail.com

> Sent: Thursday, February 28, 2019 11:26 AM

> To: FFmpeg development discussions and patches <ffmpeg-

> devel@ffmpeg.org>

> Subject: Re: [FFmpeg-devel] [PATCH 2/5] libx264: Update ROI behaviour to

> match documentation

> 

> On Thu, Feb 28, 2019 at 10:53 AM Guo, Yejun <yejun.guo@intel.com> wrote:

> >

> >

> >

> > > -----Original Message-----

> > > From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On

> Behalf

> > > Of Mark Thompson

> > > Sent: Thursday, February 28, 2019 6:00 AM

> > > To: ffmpeg-devel@ffmpeg.org

> > > Subject: [FFmpeg-devel] [PATCH 2/5] libx264: Update ROI behaviour to

> > > match documentation

> > >

> > > Fix the quantisation offset - use the whole range, and don't change the

> > > offset size based on bit depth.

> > >

> > > Use correct bottom/right edge locations (they are offsets from

> bottom/right,

> > > not from top/left).

> > >

> > > Iterate the list in reverse order.  The regions are in order of

> decreasing

> > > importance, so the most important must be applied last.

> > > ---

> > >  libavcodec/libx264.c | 50 ++++++++++++++++++++++++--------------------

> > >  1 file changed, 27 insertions(+), 23 deletions(-)

> > >

> > > diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c

> > > index a3493f393d..475719021e 100644

> > > --- a/libavcodec/libx264.c

> > > +++ b/libavcodec/libx264.c

> > > @@ -285,16 +285,18 @@ static int X264_frame(AVCodecContext *ctx,

> > > AVPacket *pkt, const AVFrame *frame,

> > >      int nnal, i, ret;

> > >      x264_picture_t pic_out = {0};

> > >      int pict_type;

> > > +    int bit_depth;

> > >      int64_t *out_opaque;

> > >      AVFrameSideData *sd;

> > >

> > >      x264_picture_init( &x4->pic );

> > >      x4->pic.img.i_csp   = x4->params.i_csp;

> > >  #if X264_BUILD >= 153

> > > -    if (x4->params.i_bitdepth > 8)

> > > +    bit_depth = x4->params.i_bitdepth;

> > >  #else

> > > -    if (x264_bit_depth > 8)

> > > +    bit_depth = x264_bit_depth;

> > >  #endif

> > > +    if (bit_depth > 8)

> > >          x4->pic.img.i_csp |= X264_CSP_HIGH_DEPTH;

> > >      x4->pic.img.i_plane = avfmt2_num_planes(ctx->pix_fmt);

> > >

> > > @@ -359,45 +361,47 @@ static int X264_frame(AVCodecContext *ctx,

> > > AVPacket *pkt, const AVFrame *frame,

> > >                  if (frame->interlaced_frame == 0) {

> > >                      int mbx = (frame->width + MB_SIZE - 1) / MB_SIZE;

> > >                      int mby = (frame->height + MB_SIZE - 1) / MB_SIZE;

> > > +                    int qp_range = 51 + 6 * (bit_depth - 8);

> >

> > just found following from "$ ./x264 --fullhelp", not sure what 81 means

> here. Shall we change our qoffset formula accordingly?

> >       --qpmin <integer>       Set min QP [0]

> >       --qpmax <integer>       Set max QP [81]

> >

> > >                      int nb_rois;

> > > -                    AVRegionOfInterest* roi;

> > > -                    float* qoffsets;

> > > +                    const AVRegionOfInterest *roi;

> > > +                    float *qoffsets;

> > >                      qoffsets = av_mallocz_array(mbx * mby,

> sizeof(*qoffsets));

> > >                      if (!qoffsets)

> > >                          return AVERROR(ENOMEM);

> > >

> > > -                    nb_rois = sd->size / sizeof(AVRegionOfInterest);

> > > -                    roi = (AVRegionOfInterest*)sd->data;

> > > -                    for (int count = 0; count < nb_rois; count++) {

> > > -                        int starty = FFMIN(mby, roi->top / MB_SIZE);

> > > -                        int endy   = FFMIN(mby, (roi->bottom + MB_SIZE

> - 1)/ MB_SIZE);

> > > -                        int startx = FFMIN(mbx, roi->left / MB_SIZE);

> > > -                        int endx   = FFMIN(mbx, (roi->right + MB_SIZE

> - 1)/ MB_SIZE);

> > > +                    roi = (const AVRegionOfInterest*)sd->data;

> > > +                    if (!roi->self_size || sd->size % roi->self_size

> != 0) {

> > > +                        av_log(ctx, AV_LOG_ERROR, "Invalid

> > > AVRegionOfInterest.self_size.\n");

> > > +                        return AVERROR(EINVAL);

> > > +                    }

> > > +                    nb_rois = sd->size / roi->self_size;

> > > +

> > > +                    // This list must be iterated in reverse because

> regions are

> > > +                    // defined in order of decreasing importance.

> >

> > Nit:   the reason may be more straight forward.

> > This list must be iterated in reverse because: when overlapping regions

> are defined, the first region containing a given area of the frame applies.

> >

> > > +                    for (int i = nb_rois - 1; i >= 0; i--) {

> > > +                        int startx, endx, starty, endy;

> > >                          float qoffset;

> > >

> > > +                        roi = (const AVRegionOfInterest*)(sd->data +

> roi->self_size * i);

> > > +

> > > +                        starty = av_clip(roi->top / MB_SIZE, 0, mby);

> > > +                        endy   = av_clip((frame->height - roi->bottom

> + MB_SIZE - 1) /

> > > MB_SIZE, 0, mby);

> > > +                        startx = av_clip(roi->left / MB_SIZE, 0, mbx);

> > > +                        endx   = av_clip((frame->width - roi->right +

> MB_SIZE - 1) /

> > > MB_SIZE, 0, mbx);

> >

> > not quite understand why endx/endy is calculated so.

> > For example, for a 1920x1080 frame, and roi->top is 0, and roi->bottom is

> 1080, then,

> > starty is 0, endy is 0, which make the following loop does not work.

> 

> I think Mark use the (left/top) and (right/bottom) as the offset, like this:

> in the fig, (left/top) == (s1x, s1y), (right/bottom) ==(s2x,s2y)

> 

> 

> +-----------------------+------> w (x)

> |          ^            |

> |          | s1y        |

> |          V            |

> |      +***********+    |

> | s1x  *           * s2x|

> |<-->  *  ROI      *<-->|

> |      *           *    |

> |      +***********+    |

> |          ^            |

> |          | s2y        |

> |          V            |

> |-----------------------+

> |

> V

> 

> h (y)

> 


thanks, I guess so.  But, I'm not quite understand why use this style. 
And also correct the following loops if we finally choose this style.

> >

> >                           for (int y = starty; y < endy; y++) {

> >                               for (int x = startx; x < endx; x++) {

> >                                   qoffsets[x + y*mbx] = qoffset;

> >                               }

> >                           }

> >

> > > +

> > >                          if (roi->qoffset.den == 0) {

> > >                              av_free(qoffsets);

> > > -                            av_log(ctx, AV_LOG_ERROR,

> > > "AVRegionOfInterest.qoffset.den should not be zero.\n");

> > > +                            av_log(ctx, AV_LOG_ERROR,

> > > "AVRegionOfInterest.qoffset.den must not be zero.\n");

> > >                              return AVERROR(EINVAL);

> > >                          }

> > >                          qoffset = roi->qoffset.num * 1.0f /

> roi->qoffset.den;

> > > -                        qoffset = av_clipf(qoffset, -1.0f, 1.0f);

> > > -

> > > -                        // 25 is a number that I think it is a

> possible proper scale value.

> > > -                        qoffset = qoffset * 25;

> > > +                        qoffset = av_clipf(qoffset * qp_range,

> -qp_range, +qp_range);

> > >

> > >                          for (int y = starty; y < endy; y++) {

> > >                              for (int x = startx; x < endx; x++) {

> > >                                  qoffsets[x + y*mbx] = qoffset;

> > >                              }

> > >                          }

> > > -

> > > -                        if (roi->self_size == 0) {

> > > -                            av_free(qoffsets);

> > > -                            av_log(ctx, AV_LOG_ERROR,

> "AVRegionOfInterest.self_size

> > > should be set to sizeof(AVRegionOfInterest).\n");

> > > -                            return AVERROR(EINVAL);

> > > -                        }

> > > -                        roi = (AVRegionOfInterest*)((char*)roi +

> roi->self_size);

> > >                      }

> > >

> > >                      x4->pic.prop.quant_offsets = qoffsets;

> > > --

> > > 2.19.2

> > >

> _______________________________________________

> ffmpeg-devel mailing list

> ffmpeg-devel@ffmpeg.org

> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Mark Thompson March 5, 2019, 12:52 a.m. UTC | #4
On 28/02/2019 06:38, Guo, Yejun wrote:
>> -----Original Message-----
>> From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On Behalf
>> Of mypopy@gmail.com
>> Sent: Thursday, February 28, 2019 11:26 AM
>> To: FFmpeg development discussions and patches <ffmpeg-
>> devel@ffmpeg.org>
>> Subject: Re: [FFmpeg-devel] [PATCH 2/5] libx264: Update ROI behaviour to
>> match documentation
>>
>> On Thu, Feb 28, 2019 at 10:53 AM Guo, Yejun <yejun.guo@intel.com> wrote:
>>>> -----Original Message-----
>>>> From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On
>> Behalf
>>>> Of Mark Thompson
>>>> Sent: Thursday, February 28, 2019 6:00 AM
>>>> To: ffmpeg-devel@ffmpeg.org
>>>> Subject: [FFmpeg-devel] [PATCH 2/5] libx264: Update ROI behaviour to
>>>> match documentation
>>>>
>>>> Fix the quantisation offset - use the whole range, and don't change the
>>>> offset size based on bit depth.
>>>>
>>>> Use correct bottom/right edge locations (they are offsets from
>> bottom/right,
>>>> not from top/left).
>>>>
>>>> Iterate the list in reverse order.  The regions are in order of
>> decreasing
>>>> importance, so the most important must be applied last.
>>>> ---
>>>>  libavcodec/libx264.c | 50 ++++++++++++++++++++++++--------------------
>>>>  1 file changed, 27 insertions(+), 23 deletions(-)
>>>>
>>>> diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
>>>> index a3493f393d..475719021e 100644
>>>> --- a/libavcodec/libx264.c
>>>> +++ b/libavcodec/libx264.c
>>>> @@ -285,16 +285,18 @@ static int X264_frame(AVCodecContext *ctx,
>>>> AVPacket *pkt, const AVFrame *frame,
>>>>      int nnal, i, ret;
>>>>      x264_picture_t pic_out = {0};
>>>>      int pict_type;
>>>> +    int bit_depth;
>>>>      int64_t *out_opaque;
>>>>      AVFrameSideData *sd;
>>>>
>>>>      x264_picture_init( &x4->pic );
>>>>      x4->pic.img.i_csp   = x4->params.i_csp;
>>>>  #if X264_BUILD >= 153
>>>> -    if (x4->params.i_bitdepth > 8)
>>>> +    bit_depth = x4->params.i_bitdepth;
>>>>  #else
>>>> -    if (x264_bit_depth > 8)
>>>> +    bit_depth = x264_bit_depth;
>>>>  #endif
>>>> +    if (bit_depth > 8)
>>>>          x4->pic.img.i_csp |= X264_CSP_HIGH_DEPTH;
>>>>      x4->pic.img.i_plane = avfmt2_num_planes(ctx->pix_fmt);
>>>>
>>>> @@ -359,45 +361,47 @@ static int X264_frame(AVCodecContext *ctx,
>>>> AVPacket *pkt, const AVFrame *frame,
>>>>                  if (frame->interlaced_frame == 0) {
>>>>                      int mbx = (frame->width + MB_SIZE - 1) / MB_SIZE;
>>>>                      int mby = (frame->height + MB_SIZE - 1) / MB_SIZE;
>>>> +                    int qp_range = 51 + 6 * (bit_depth - 8);
>>>
>>> just found following from "$ ./x264 --fullhelp", not sure what 81 means
>> here. Shall we change our qoffset formula accordingly?
>>>       --qpmin <integer>       Set min QP [0]
>>>       --qpmax <integer>       Set max QP [81]

libx264 applies a fixed offset to the QP range to make it nonnegative (by adding QpBdOffsetY).  The maximum of 81 therefore corresponds to a bit depth of (81-51)/6+8 = 13 bits, the higher values only being valid at the higher depths.

I guess that's set for a higher depth than libx264 actually supports to avoid any future problems with range (e.g. 12-bit support would not be an unreasonable feature).

>>>>                      int nb_rois;
>>>> -                    AVRegionOfInterest* roi;
>>>> -                    float* qoffsets;
>>>> +                    const AVRegionOfInterest *roi;
>>>> +                    float *qoffsets;
>>>>                      qoffsets = av_mallocz_array(mbx * mby,
>> sizeof(*qoffsets));
>>>>                      if (!qoffsets)
>>>>                          return AVERROR(ENOMEM);
>>>>
>>>> -                    nb_rois = sd->size / sizeof(AVRegionOfInterest);
>>>> -                    roi = (AVRegionOfInterest*)sd->data;
>>>> -                    for (int count = 0; count < nb_rois; count++) {
>>>> -                        int starty = FFMIN(mby, roi->top / MB_SIZE);
>>>> -                        int endy   = FFMIN(mby, (roi->bottom + MB_SIZE
>> - 1)/ MB_SIZE);
>>>> -                        int startx = FFMIN(mbx, roi->left / MB_SIZE);
>>>> -                        int endx   = FFMIN(mbx, (roi->right + MB_SIZE
>> - 1)/ MB_SIZE);
>>>> +                    roi = (const AVRegionOfInterest*)sd->data;
>>>> +                    if (!roi->self_size || sd->size % roi->self_size
>> != 0) {
>>>> +                        av_log(ctx, AV_LOG_ERROR, "Invalid
>>>> AVRegionOfInterest.self_size.\n");
>>>> +                        return AVERROR(EINVAL);
>>>> +                    }
>>>> +                    nb_rois = sd->size / roi->self_size;
>>>> +
>>>> +                    // This list must be iterated in reverse because
>> regions are
>>>> +                    // defined in order of decreasing importance.
>>>
>>> Nit:   the reason may be more straight forward.
>>> This list must be iterated in reverse because: when overlapping regions
>> are defined, the first region containing a given area of the frame applies.

Right, yes.  I've updated the comment appropriately.

>>>> +                    for (int i = nb_rois - 1; i >= 0; i--) {
>>>> +                        int startx, endx, starty, endy;
>>>>                          float qoffset;
>>>>
>>>> +                        roi = (const AVRegionOfInterest*)(sd->data +
>> roi->self_size * i);
>>>> +
>>>> +                        starty = av_clip(roi->top / MB_SIZE, 0, mby);
>>>> +                        endy   = av_clip((frame->height - roi->bottom
>> + MB_SIZE - 1) /
>>>> MB_SIZE, 0, mby);
>>>> +                        startx = av_clip(roi->left / MB_SIZE, 0, mbx);
>>>> +                        endx   = av_clip((frame->width - roi->right +
>> MB_SIZE - 1) /
>>>> MB_SIZE, 0, mbx);
>>>
>>> not quite understand why endx/endy is calculated so.
>>> For example, for a 1920x1080 frame, and roi->top is 0, and roi->bottom is
>> 1080, then,
>>> starty is 0, endy is 0, which make the following loop does not work.
>>
>> I think Mark use the (left/top) and (right/bottom) as the offset, like this:
>> in the fig, (left/top) == (s1x, s1y), (right/bottom) ==(s2x,s2y)
>>
>>
>> +-----------------------+------> w (x)
>> |          ^            |
>> |          | s1y        |
>> |          V            |
>> |      +***********+    |
>> | s1x  *           * s2x|
>> |<-->  *  ROI      *<-->|
>> |      *           *    |
>> |      +***********+    |
>> |          ^            |
>> |          | s2y        |
>> |          V            |
>> |-----------------------+
>> |
>> V
>>
>> h (y)
>>
> 
> thanks, I guess so.  But, I'm not quite understand why use this style. 

This definition is what is in the current documentation: <http://git.videolan.org/?p=ffmpeg.git;a=blob;f=libavutil/frame.h;h=8aa3e88367ad3605033dbfe92a431d97e0834023;hb=HEAD#l215>.  I followed that when writing the VAAPI support, and only afterwards realised that it didn't match what libx264 was doing.

I wouldn't mind a different way to define the rectangle if everyone agrees.  This method matches cropping in AVFrame and various codecs like H.264.  The current libx26[45] code instead defines the coordinates of the top left and bottom right of the region.  A third alternative would be the coordinates of the top left combined with a width and height, which would match some other filters as suggested by Moritz in the addroi thread (that was primarily talking about the filter option interface, but I don't think it's unreasonable to want the API to match).

Thanks

- Mark
Guo, Yejun March 5, 2019, 12:46 p.m. UTC | #5
> -----Original Message-----

> From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On Behalf

> Of Mark Thompson

> Sent: Tuesday, March 05, 2019 8:52 AM

> To: ffmpeg-devel@ffmpeg.org

> Subject: Re: [FFmpeg-devel] [PATCH 2/5] libx264: Update ROI behaviour to

> match documentation

> 

> On 28/02/2019 06:38, Guo, Yejun wrote:

> >> -----Original Message-----

> >> From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On

> Behalf

> >> Of mypopy@gmail.com

> >> Sent: Thursday, February 28, 2019 11:26 AM

> >> To: FFmpeg development discussions and patches <ffmpeg-

> >> devel@ffmpeg.org>

> >> Subject: Re: [FFmpeg-devel] [PATCH 2/5] libx264: Update ROI behaviour

> to

> >> match documentation

> >>

> >> On Thu, Feb 28, 2019 at 10:53 AM Guo, Yejun <yejun.guo@intel.com>

> wrote:

> >>>> -----Original Message-----

> >>>> From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On

> >> Behalf

> >>>> Of Mark Thompson

> >>>> Sent: Thursday, February 28, 2019 6:00 AM

> >>>> To: ffmpeg-devel@ffmpeg.org

> >>>> Subject: [FFmpeg-devel] [PATCH 2/5] libx264: Update ROI behaviour to

> >>>> match documentation

> >>>>

> >>>> Fix the quantisation offset - use the whole range, and don't change the

> >>>> offset size based on bit depth.

> >>>>

> >>>> Use correct bottom/right edge locations (they are offsets from

> >> bottom/right,

> >>>> not from top/left).

> >>>>

> >>>> Iterate the list in reverse order.  The regions are in order of

> >> decreasing

> >>>> importance, so the most important must be applied last.

> >>>> ---

> >>>>  libavcodec/libx264.c | 50 ++++++++++++++++++++++++-----------------

> ---

> >>>>  1 file changed, 27 insertions(+), 23 deletions(-)

> >>>>

> >>>> diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c

> >>>> index a3493f393d..475719021e 100644

> >>>> --- a/libavcodec/libx264.c

> >>>> +++ b/libavcodec/libx264.c

> >>>> @@ -285,16 +285,18 @@ static int X264_frame(AVCodecContext *ctx,

> >>>> AVPacket *pkt, const AVFrame *frame,

> >>>>      int nnal, i, ret;

> >>>>      x264_picture_t pic_out = {0};

> >>>>      int pict_type;

> >>>> +    int bit_depth;

> >>>>      int64_t *out_opaque;

> >>>>      AVFrameSideData *sd;

> >>>>

> >>>>      x264_picture_init( &x4->pic );

> >>>>      x4->pic.img.i_csp   = x4->params.i_csp;

> >>>>  #if X264_BUILD >= 153

> >>>> -    if (x4->params.i_bitdepth > 8)

> >>>> +    bit_depth = x4->params.i_bitdepth;

> >>>>  #else

> >>>> -    if (x264_bit_depth > 8)

> >>>> +    bit_depth = x264_bit_depth;

> >>>>  #endif

> >>>> +    if (bit_depth > 8)

> >>>>          x4->pic.img.i_csp |= X264_CSP_HIGH_DEPTH;

> >>>>      x4->pic.img.i_plane = avfmt2_num_planes(ctx->pix_fmt);

> >>>>

> >>>> @@ -359,45 +361,47 @@ static int X264_frame(AVCodecContext *ctx,

> >>>> AVPacket *pkt, const AVFrame *frame,

> >>>>                  if (frame->interlaced_frame == 0) {

> >>>>                      int mbx = (frame->width + MB_SIZE - 1) / MB_SIZE;

> >>>>                      int mby = (frame->height + MB_SIZE - 1) / MB_SIZE;

> >>>> +                    int qp_range = 51 + 6 * (bit_depth - 8);

> >>>

> >>> just found following from "$ ./x264 --fullhelp", not sure what 81 means

> >> here. Shall we change our qoffset formula accordingly?

> >>>       --qpmin <integer>       Set min QP [0]

> >>>       --qpmax <integer>       Set max QP [81]

> 

> libx264 applies a fixed offset to the QP range to make it nonnegative (by

> adding QpBdOffsetY).  The maximum of 81 therefore corresponds to a bit

> depth of (81-51)/6+8 = 13 bits, the higher values only being valid at the higher

> depths.

> 

> I guess that's set for a higher depth than libx264 actually supports to avoid

> any future problems with range (e.g. 12-bit support would not be an

> unreasonable feature).

> 


got it, thanks.

> >>>>                      int nb_rois;

> >>>> -                    AVRegionOfInterest* roi;

> >>>> -                    float* qoffsets;

> >>>> +                    const AVRegionOfInterest *roi;

> >>>> +                    float *qoffsets;

> >>>>                      qoffsets = av_mallocz_array(mbx * mby,

> >> sizeof(*qoffsets));

> >>>>                      if (!qoffsets)

> >>>>                          return AVERROR(ENOMEM);

> >>>>

> >>>> -                    nb_rois = sd->size / sizeof(AVRegionOfInterest);

> >>>> -                    roi = (AVRegionOfInterest*)sd->data;

> >>>> -                    for (int count = 0; count < nb_rois; count++) {

> >>>> -                        int starty = FFMIN(mby, roi->top / MB_SIZE);

> >>>> -                        int endy   = FFMIN(mby, (roi->bottom + MB_SIZE

> >> - 1)/ MB_SIZE);

> >>>> -                        int startx = FFMIN(mbx, roi->left / MB_SIZE);

> >>>> -                        int endx   = FFMIN(mbx, (roi->right + MB_SIZE

> >> - 1)/ MB_SIZE);

> >>>> +                    roi = (const AVRegionOfInterest*)sd->data;

> >>>> +                    if (!roi->self_size || sd->size % roi->self_size

> >> != 0) {

> >>>> +                        av_log(ctx, AV_LOG_ERROR, "Invalid

> >>>> AVRegionOfInterest.self_size.\n");

> >>>> +                        return AVERROR(EINVAL);

> >>>> +                    }

> >>>> +                    nb_rois = sd->size / roi->self_size;

> >>>> +

> >>>> +                    // This list must be iterated in reverse because

> >> regions are

> >>>> +                    // defined in order of decreasing importance.

> >>>

> >>> Nit:   the reason may be more straight forward.

> >>> This list must be iterated in reverse because: when overlapping regions

> >> are defined, the first region containing a given area of the frame applies.

> 

> Right, yes.  I've updated the comment appropriately.

> 

> >>>> +                    for (int i = nb_rois - 1; i >= 0; i--) {

> >>>> +                        int startx, endx, starty, endy;

> >>>>                          float qoffset;

> >>>>

> >>>> +                        roi = (const AVRegionOfInterest*)(sd->data +

> >> roi->self_size * i);

> >>>> +

> >>>> +                        starty = av_clip(roi->top / MB_SIZE, 0, mby);

> >>>> +                        endy   = av_clip((frame->height - roi->bottom

> >> + MB_SIZE - 1) /

> >>>> MB_SIZE, 0, mby);

> >>>> +                        startx = av_clip(roi->left / MB_SIZE, 0, mbx);

> >>>> +                        endx   = av_clip((frame->width - roi->right +

> >> MB_SIZE - 1) /

> >>>> MB_SIZE, 0, mbx);

> >>>

> >>> not quite understand why endx/endy is calculated so.

> >>> For example, for a 1920x1080 frame, and roi->top is 0, and roi->bottom is

> >> 1080, then,

> >>> starty is 0, endy is 0, which make the following loop does not work.

> >>

> >> I think Mark use the (left/top) and (right/bottom) as the offset, like this:

> >> in the fig, (left/top) == (s1x, s1y), (right/bottom) ==(s2x,s2y)

> >>

> >>

> >> +-----------------------+------> w (x)

> >> |          ^            |

> >> |          | s1y        |

> >> |          V            |

> >> |      +***********+    |

> >> | s1x  *           * s2x|

> >> |<-->  *  ROI      *<-->|

> >> |      *           *    |

> >> |      +***********+    |

> >> |          ^            |

> >> |          | s2y        |

> >> |          V            |

> >> |-----------------------+

> >> |

> >> V

> >>

> >> h (y)

> >>

> >

> > thanks, I guess so.  But, I'm not quite understand why use this style.

> 

> This definition is what is in the current documentation:

> <http://git.videolan.org/?p=ffmpeg.git;a=blob;f=libavutil/frame.h;h=8aa3e8

> 8367ad3605033dbfe92a431d97e0834023;hb=HEAD#l215>.  I followed that

> when writing the VAAPI support, and only afterwards realised that it didn't

> match what libx264 was doing.


my fault, I just copied the comment, I min-understood it. I would prefer to 
correct the documentation typo here.

> 

> I wouldn't mind a different way to define the rectangle if everyone agrees.

> This method matches cropping in AVFrame and various codecs like H.264.

> The current libx26[45] code instead defines the coordinates of the top left

> and bottom right of the region.  A third alternative would be the coordinates

> of the top left combined with a width and height, which would match some

> other filters as suggested by Moritz in the addroi thread (that was primarily

> talking about the filter option interface, but I don't think it's unreasonable to

> want the API to match).


I'm also ok to switch to top/left/width/height.

> 

> Thanks

> 

> - Mark

> _______________________________________________

> ffmpeg-devel mailing list

> ffmpeg-devel@ffmpeg.org

> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Guo, Yejun March 11, 2019, 3:24 a.m. UTC | #6
> -----Original Message-----

> From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On Behalf

> Of Guo, Yejun

> Sent: Tuesday, March 05, 2019 8:46 PM

> To: FFmpeg development discussions and patches <ffmpeg-

> devel@ffmpeg.org>

> Subject: Re: [FFmpeg-devel] [PATCH 2/5] libx264: Update ROI behaviour to

> match documentation

> 

> 

> 

> > -----Original Message-----

> > From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On

> Behalf

> > Of Mark Thompson

> > Sent: Tuesday, March 05, 2019 8:52 AM

> > To: ffmpeg-devel@ffmpeg.org

> > Subject: Re: [FFmpeg-devel] [PATCH 2/5] libx264: Update ROI behaviour to

> > match documentation

> >

> > On 28/02/2019 06:38, Guo, Yejun wrote:

> > >> -----Original Message-----

> > >> From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On

> > Behalf

> > >> Of mypopy@gmail.com

> > >> Sent: Thursday, February 28, 2019 11:26 AM

> > >> To: FFmpeg development discussions and patches <ffmpeg-

> > >> devel@ffmpeg.org>

> > >> Subject: Re: [FFmpeg-devel] [PATCH 2/5] libx264: Update ROI behaviour

> > to

> > >> match documentation

> > >>

> > >> On Thu, Feb 28, 2019 at 10:53 AM Guo, Yejun <yejun.guo@intel.com>

> > wrote:

> > >>>> -----Original Message-----

> > >>>> From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On

> > >> Behalf

> > >>>> Of Mark Thompson

> > >>>> Sent: Thursday, February 28, 2019 6:00 AM

> > >>>> To: ffmpeg-devel@ffmpeg.org

> > >>>> Subject: [FFmpeg-devel] [PATCH 2/5] libx264: Update ROI behaviour

> to

> > >>>> match documentation

> > >>>>

> > >>>> Fix the quantisation offset - use the whole range, and don't change

> the

> > >>>> offset size based on bit depth.

> > >>>>

> > >>>> Use correct bottom/right edge locations (they are offsets from

> > >> bottom/right,

> > >>>> not from top/left).

> > >>>>

> > >>>> Iterate the list in reverse order.  The regions are in order of

> > >> decreasing

> > >>>> importance, so the most important must be applied last.

> > >>>> ---

> > >>>>  libavcodec/libx264.c | 50 ++++++++++++++++++++++++---------------

> --

> > ---

> > >>>>  1 file changed, 27 insertions(+), 23 deletions(-)

> > >>>>

> > >>>> diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c

> > >>>> index a3493f393d..475719021e 100644

> > >>>> --- a/libavcodec/libx264.c

> > >>>> +++ b/libavcodec/libx264.c

> > >>>> @@ -285,16 +285,18 @@ static int X264_frame(AVCodecContext *ctx,

> > >>>> AVPacket *pkt, const AVFrame *frame,

> > >>>>      int nnal, i, ret;

> > >>>>      x264_picture_t pic_out = {0};

> > >>>>      int pict_type;

> > >>>> +    int bit_depth;

> > >>>>      int64_t *out_opaque;

> > >>>>      AVFrameSideData *sd;

> > >>>>

> > >>>>      x264_picture_init( &x4->pic );

> > >>>>      x4->pic.img.i_csp   = x4->params.i_csp;

> > >>>>  #if X264_BUILD >= 153

> > >>>> -    if (x4->params.i_bitdepth > 8)

> > >>>> +    bit_depth = x4->params.i_bitdepth;

> > >>>>  #else

> > >>>> -    if (x264_bit_depth > 8)

> > >>>> +    bit_depth = x264_bit_depth;

> > >>>>  #endif

> > >>>> +    if (bit_depth > 8)

> > >>>>          x4->pic.img.i_csp |= X264_CSP_HIGH_DEPTH;

> > >>>>      x4->pic.img.i_plane = avfmt2_num_planes(ctx->pix_fmt);

> > >>>>

> > >>>> @@ -359,45 +361,47 @@ static int X264_frame(AVCodecContext *ctx,

> > >>>> AVPacket *pkt, const AVFrame *frame,

> > >>>>                  if (frame->interlaced_frame == 0) {

> > >>>>                      int mbx = (frame->width + MB_SIZE - 1) / MB_SIZE;

> > >>>>                      int mby = (frame->height + MB_SIZE - 1) / MB_SIZE;

> > >>>> +                    int qp_range = 51 + 6 * (bit_depth - 8);

> > >>>

> > >>> just found following from "$ ./x264 --fullhelp", not sure what 81 means

> > >> here. Shall we change our qoffset formula accordingly?

> > >>>       --qpmin <integer>       Set min QP [0]

> > >>>       --qpmax <integer>       Set max QP [81]

> >

> > libx264 applies a fixed offset to the QP range to make it nonnegative (by

> > adding QpBdOffsetY).  The maximum of 81 therefore corresponds to a bit

> > depth of (81-51)/6+8 = 13 bits, the higher values only being valid at the

> higher

> > depths.

> >

> > I guess that's set for a higher depth than libx264 actually supports to avoid

> > any future problems with range (e.g. 12-bit support would not be an

> > unreasonable feature).

> >

> 

> got it, thanks.

> 

> > >>>>                      int nb_rois;

> > >>>> -                    AVRegionOfInterest* roi;

> > >>>> -                    float* qoffsets;

> > >>>> +                    const AVRegionOfInterest *roi;

> > >>>> +                    float *qoffsets;

> > >>>>                      qoffsets = av_mallocz_array(mbx * mby,

> > >> sizeof(*qoffsets));

> > >>>>                      if (!qoffsets)

> > >>>>                          return AVERROR(ENOMEM);

> > >>>>

> > >>>> -                    nb_rois = sd->size / sizeof(AVRegionOfInterest);

> > >>>> -                    roi = (AVRegionOfInterest*)sd->data;

> > >>>> -                    for (int count = 0; count < nb_rois; count++) {

> > >>>> -                        int starty = FFMIN(mby, roi->top / MB_SIZE);

> > >>>> -                        int endy   = FFMIN(mby, (roi->bottom + MB_SIZE

> > >> - 1)/ MB_SIZE);

> > >>>> -                        int startx = FFMIN(mbx, roi->left / MB_SIZE);

> > >>>> -                        int endx   = FFMIN(mbx, (roi->right + MB_SIZE

> > >> - 1)/ MB_SIZE);

> > >>>> +                    roi = (const AVRegionOfInterest*)sd->data;

> > >>>> +                    if (!roi->self_size || sd->size % roi->self_size

> > >> != 0) {

> > >>>> +                        av_log(ctx, AV_LOG_ERROR, "Invalid

> > >>>> AVRegionOfInterest.self_size.\n");

> > >>>> +                        return AVERROR(EINVAL);

> > >>>> +                    }

> > >>>> +                    nb_rois = sd->size / roi->self_size;

> > >>>> +

> > >>>> +                    // This list must be iterated in reverse because

> > >> regions are

> > >>>> +                    // defined in order of decreasing importance.

> > >>>

> > >>> Nit:   the reason may be more straight forward.

> > >>> This list must be iterated in reverse because: when overlapping regions

> > >> are defined, the first region containing a given area of the frame applies.

> >

> > Right, yes.  I've updated the comment appropriately.

> >

> > >>>> +                    for (int i = nb_rois - 1; i >= 0; i--) {

> > >>>> +                        int startx, endx, starty, endy;

> > >>>>                          float qoffset;

> > >>>>

> > >>>> +                        roi = (const AVRegionOfInterest*)(sd->data +

> > >> roi->self_size * i);


another comment raised after discussing with Jun Zhao.

Since we don't choose to check every roi->self_size, we need to do
'self_size = roi->self_size' at the beginning (where roi is sd->data[0]), and this line as:
roi = (const AVRegionOfInterest*)(sd->data + self_size * i);


> > >>>> +

> > >>>> +                        starty = av_clip(roi->top / MB_SIZE, 0, mby);

> > >>>> +                        endy   = av_clip((frame->height - roi->bottom

> > >> + MB_SIZE - 1) /

> > >>>> MB_SIZE, 0, mby);

> > >>>> +                        startx = av_clip(roi->left / MB_SIZE, 0, mbx);

> > >>>> +                        endx   = av_clip((frame->width - roi->right +

> > >> MB_SIZE - 1) /

> > >>>> MB_SIZE, 0, mbx);

> > >>>

> > >>> not quite understand why endx/endy is calculated so.

> > >>> For example, for a 1920x1080 frame, and roi->top is 0, and roi->bottom

> is

> > >> 1080, then,

> > >>> starty is 0, endy is 0, which make the following loop does not work.

> > >>

> > >> I think Mark use the (left/top) and (right/bottom) as the offset, like this:

> > >> in the fig, (left/top) == (s1x, s1y), (right/bottom) ==(s2x,s2y)

> > >>

> > >>

> > >> +-----------------------+------> w (x)

> > >> |          ^            |

> > >> |          | s1y        |

> > >> |          V            |

> > >> |      +***********+    |

> > >> | s1x  *           * s2x|

> > >> |<-->  *  ROI      *<-->|

> > >> |      *           *    |

> > >> |      +***********+    |

> > >> |          ^            |

> > >> |          | s2y        |

> > >> |          V            |

> > >> |-----------------------+

> > >> |

> > >> V

> > >>

> > >> h (y)

> > >>

> > >

> > > thanks, I guess so.  But, I'm not quite understand why use this style.

> >

> > This definition is what is in the current documentation:

> >

> <http://git.videolan.org/?p=ffmpeg.git;a=blob;f=libavutil/frame.h;h=8aa3e8

> > 8367ad3605033dbfe92a431d97e0834023;hb=HEAD#l215>.  I followed that

> > when writing the VAAPI support, and only afterwards realised that it didn't

> > match what libx264 was doing.

> 

> my fault, I just copied the comment, I min-understood it. I would prefer to

> correct the documentation typo here.

> 

> >

> > I wouldn't mind a different way to define the rectangle if everyone agrees.

> > This method matches cropping in AVFrame and various codecs like H.264.

> > The current libx26[45] code instead defines the coordinates of the top left

> > and bottom right of the region.  A third alternative would be the

> coordinates

> > of the top left combined with a width and height, which would match some

> > other filters as suggested by Moritz in the addroi thread (that was primarily

> > talking about the filter option interface, but I don't think it's unreasonable

> to

> > want the API to match).

> 

> I'm also ok to switch to top/left/width/height.

> 

> >

> > Thanks

> >

> > - Mark

> > _______________________________________________

> > ffmpeg-devel mailing list

> > ffmpeg-devel@ffmpeg.org

> > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

> _______________________________________________

> ffmpeg-devel mailing list

> ffmpeg-devel@ffmpeg.org

> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
diff mbox

Patch

diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
index a3493f393d..475719021e 100644
--- a/libavcodec/libx264.c
+++ b/libavcodec/libx264.c
@@ -285,16 +285,18 @@  static int X264_frame(AVCodecContext *ctx, AVPacket *pkt, const AVFrame *frame,
     int nnal, i, ret;
     x264_picture_t pic_out = {0};
     int pict_type;
+    int bit_depth;
     int64_t *out_opaque;
     AVFrameSideData *sd;
 
     x264_picture_init( &x4->pic );
     x4->pic.img.i_csp   = x4->params.i_csp;
 #if X264_BUILD >= 153
-    if (x4->params.i_bitdepth > 8)
+    bit_depth = x4->params.i_bitdepth;
 #else
-    if (x264_bit_depth > 8)
+    bit_depth = x264_bit_depth;
 #endif
+    if (bit_depth > 8)
         x4->pic.img.i_csp |= X264_CSP_HIGH_DEPTH;
     x4->pic.img.i_plane = avfmt2_num_planes(ctx->pix_fmt);
 
@@ -359,45 +361,47 @@  static int X264_frame(AVCodecContext *ctx, AVPacket *pkt, const AVFrame *frame,
                 if (frame->interlaced_frame == 0) {
                     int mbx = (frame->width + MB_SIZE - 1) / MB_SIZE;
                     int mby = (frame->height + MB_SIZE - 1) / MB_SIZE;
+                    int qp_range = 51 + 6 * (bit_depth - 8);
                     int nb_rois;
-                    AVRegionOfInterest* roi;
-                    float* qoffsets;
+                    const AVRegionOfInterest *roi;
+                    float *qoffsets;
                     qoffsets = av_mallocz_array(mbx * mby, sizeof(*qoffsets));
                     if (!qoffsets)
                         return AVERROR(ENOMEM);
 
-                    nb_rois = sd->size / sizeof(AVRegionOfInterest);
-                    roi = (AVRegionOfInterest*)sd->data;
-                    for (int count = 0; count < nb_rois; count++) {
-                        int starty = FFMIN(mby, roi->top / MB_SIZE);
-                        int endy   = FFMIN(mby, (roi->bottom + MB_SIZE - 1)/ MB_SIZE);
-                        int startx = FFMIN(mbx, roi->left / MB_SIZE);
-                        int endx   = FFMIN(mbx, (roi->right + MB_SIZE - 1)/ MB_SIZE);
+                    roi = (const AVRegionOfInterest*)sd->data;
+                    if (!roi->self_size || sd->size % roi->self_size != 0) {
+                        av_log(ctx, AV_LOG_ERROR, "Invalid AVRegionOfInterest.self_size.\n");
+                        return AVERROR(EINVAL);
+                    }
+                    nb_rois = sd->size / roi->self_size;
+
+                    // This list must be iterated in reverse because regions are
+                    // defined in order of decreasing importance.
+                    for (int i = nb_rois - 1; i >= 0; i--) {
+                        int startx, endx, starty, endy;
                         float qoffset;
 
+                        roi = (const AVRegionOfInterest*)(sd->data + roi->self_size * i);
+
+                        starty = av_clip(roi->top / MB_SIZE, 0, mby);
+                        endy   = av_clip((frame->height - roi->bottom + MB_SIZE - 1) / MB_SIZE, 0, mby);
+                        startx = av_clip(roi->left / MB_SIZE, 0, mbx);
+                        endx   = av_clip((frame->width - roi->right + MB_SIZE - 1) / MB_SIZE, 0, mbx);
+
                         if (roi->qoffset.den == 0) {
                             av_free(qoffsets);
-                            av_log(ctx, AV_LOG_ERROR, "AVRegionOfInterest.qoffset.den should not be zero.\n");
+                            av_log(ctx, AV_LOG_ERROR, "AVRegionOfInterest.qoffset.den must not be zero.\n");
                             return AVERROR(EINVAL);
                         }
                         qoffset = roi->qoffset.num * 1.0f / roi->qoffset.den;
-                        qoffset = av_clipf(qoffset, -1.0f, 1.0f);
-
-                        // 25 is a number that I think it is a possible proper scale value.
-                        qoffset = qoffset * 25;
+                        qoffset = av_clipf(qoffset * qp_range, -qp_range, +qp_range);
 
                         for (int y = starty; y < endy; y++) {
                             for (int x = startx; x < endx; x++) {
                                 qoffsets[x + y*mbx] = qoffset;
                             }
                         }
-
-                        if (roi->self_size == 0) {
-                            av_free(qoffsets);
-                            av_log(ctx, AV_LOG_ERROR, "AVRegionOfInterest.self_size should be set to sizeof(AVRegionOfInterest).\n");
-                            return AVERROR(EINVAL);
-                        }
-                        roi = (AVRegionOfInterest*)((char*)roi + roi->self_size);
                     }
 
                     x4->pic.prop.quant_offsets = qoffsets;