Message ID | 1545768695-31176-1-git-send-email-yejun.guo@intel.com |
---|---|
State | New |
Headers | show |
On Wed, Dec 26, 2018 at 04:11:35AM +0800, Guo, Yejun wrote: > This patchset contains two patches. > - the first patch (this patch) finished the code and ask for upstream. > - the second patch is just a quick example on how to generate ROI info. > > The encoders such as libx264 support different QPs offset for different MBs, > it makes possible for ROI-based encoding. It makes sense to add support > within ffmpeg to generate/accept ROI infos and pass into encoders. > > Typical usage: After AVFrame is decoded, a ffmpeg filter or user's code > generates ROI info for that frame, and the encoder finally does the > ROI-based encoding. > > This patch just enabled the path from ffmpeg to libx264, the more encoders > can be added later. > > Signed-off-by: Guo, Yejun <yejun.guo@intel.com> > --- > libavcodec/libx264.c | 40 ++++++++++++++++++++++++++++++++++++++++ > libavutil/frame.c | 1 + > libavutil/frame.h | 19 +++++++++++++++++++ > 3 files changed, 60 insertions(+) The commit message talks about 2 patches but this commit is one patch > > diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c > index a68d0a7..a4f8677 100644 > --- a/libavcodec/libx264.c > +++ b/libavcodec/libx264.c > @@ -40,6 +40,10 @@ > #include <stdlib.h> > #include <string.h> > > +// from x264.h, for quant_offsets, Macroblocks are 16x16 > +// blocks of pixels (with respect to the luma plane) > +#define MB_SIZE 16 > + > typedef struct X264Context { > AVClass *class; > x264_param_t params; > @@ -345,6 +349,42 @@ static int X264_frame(AVCodecContext *ctx, AVPacket *pkt, const AVFrame *frame, > } > } > } > + > + AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_ROIS); > + if (sd != NULL) { > + if (x4->params.rc.i_aq_mode == X264_AQ_NONE) { > + av_log(ctx, AV_LOG_WARNING, "Adaptive quantization must be enabled to use ROI encoding, skipping ROI.\n"); > + } else { > + if (frame->interlaced_frame == 0) { > + size_t mbx = (frame->width + MB_SIZE - 1) / MB_SIZE; > + size_t mby = (frame->height + MB_SIZE - 1) / MB_SIZE; > + float* qoffsets; > + qoffsets = (float*)av_malloc(sizeof(*qoffsets) * mbx * mby); > + if (qoffsets == NULL) > + return AVERROR(ENOMEM); > + memset(qoffsets, 0, sizeof(*qoffsets) * mbx * mby); av_mallocz_array() > + > + size_t nb_rois = sd->size / sizeof(AVROI); > + AVROI* rois = (AVROI*)sd->data; > + for (size_t roi = 0; roi < nb_rois; roi++) { > + int starty = FFMIN(mby, rois[roi].top / MB_SIZE); > + int endy = FFMIN(mby, (rois[roi].bottom + MB_SIZE - 1)/ MB_SIZE); > + int startx = FFMIN(mbx, rois[roi].left / MB_SIZE); > + int endx = FFMIN(mbx, (rois[roi].right + MB_SIZE - 1)/ MB_SIZE); > + for (int y = starty; y < endy; y++) { > + for (int x = startx; x < endx; x++) { > + qoffsets[x + y*mbx] = rois[roi].qoffset; > + } > + } > + } > + > + x4->pic.prop.quant_offsets = qoffsets; > + x4->pic.prop.quant_offsets_free = av_free; > + } else { > + av_log(ctx, AV_LOG_WARNING, "interlaced_frame not supported for ROI encoding yet, skipping ROI.\n"); > + } > + } > + } > } > > do { > diff --git a/libavutil/frame.c b/libavutil/frame.c > index 34a6210..bebc50e 100644 > --- a/libavutil/frame.c > +++ b/libavutil/frame.c > @@ -841,6 +841,7 @@ const char *av_frame_side_data_name(enum AVFrameSideDataType type) > case AV_FRAME_DATA_QP_TABLE_DATA: return "QP table data"; > #endif > case AV_FRAME_DATA_DYNAMIC_HDR_PLUS: return "HDR Dynamic Metadata SMPTE2094-40 (HDR10+)"; > + case AV_FRAME_DATA_ROIS: return "Regions Of Interest"; > } > return NULL; > } > diff --git a/libavutil/frame.h b/libavutil/frame.h > index 582ac47..d18d235 100644 > --- a/libavutil/frame.h > +++ b/libavutil/frame.h > @@ -173,6 +173,12 @@ enum AVFrameSideDataType { > * volume transform - application 4 of SMPTE 2094-40:2016 standard. > */ > AV_FRAME_DATA_DYNAMIC_HDR_PLUS, > + > + /** > + * Regions Of Interest, the number of ROI area is implied > + * in the size of buf. > + */ > + AV_FRAME_DATA_ROIS, > }; the API addition to libavutil should be in a seperate patch from the use of the API, it also must bump the version The text describing AV_FRAME_DATA_ROIS also is inadequate to use it it doesnt really describe what this side data looks like exactly [...]
> -----Original Message----- > From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On Behalf > Of Michael Niedermayer > Sent: Thursday, December 27, 2018 12:44 AM > To: FFmpeg development discussions and patches <ffmpeg- > devel@ffmpeg.org> > Subject: Re: [FFmpeg-devel] [PATCH V2 1/2] add support for ROI-based > encoding > > On Wed, Dec 26, 2018 at 04:11:35AM +0800, Guo, Yejun wrote: > > This patchset contains two patches. > > - the first patch (this patch) finished the code and ask for upstream. > > - the second patch is just a quick example on how to generate ROI info. > > > > The encoders such as libx264 support different QPs offset for > > different MBs, it makes possible for ROI-based encoding. It makes > > sense to add support within ffmpeg to generate/accept ROI infos and pass > into encoders. > > > > Typical usage: After AVFrame is decoded, a ffmpeg filter or user's > > code generates ROI info for that frame, and the encoder finally does > > the ROI-based encoding. > > > > This patch just enabled the path from ffmpeg to libx264, the more > > encoders can be added later. > > > > Signed-off-by: Guo, Yejun <yejun.guo@intel.com> > > --- > > libavcodec/libx264.c | 40 > ++++++++++++++++++++++++++++++++++++++++ > > libavutil/frame.c | 1 + > > libavutil/frame.h | 19 +++++++++++++++++++ > > 3 files changed, 60 insertions(+) > > The commit message talks about 2 patches but this commit is one patch > This patch is: [FFmpeg-devel] [PATCH V2 1/2] add support for ROI-based encoding, see http://ffmpeg.org/pipermail/ffmpeg-devel/2018-December/238135.html, and the other patch is: [FFmpeg-devel] [PATCH V2 2/2] add an example to show how to fill the ROI info , see http://ffmpeg.org/pipermail/ffmpeg-devel/2018-December/238136.html. I sent out the two patches almost at the same time. Thanks for pointing out the possible confusion, to make it clear, I will remove the relative descriptions in this patch, and keep the relative descriptions in the last patch (the patch that just for a reference, not for upstream). > > > > > diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c index > > a68d0a7..a4f8677 100644 > > --- a/libavcodec/libx264.c > > +++ b/libavcodec/libx264.c > > @@ -40,6 +40,10 @@ > > #include <stdlib.h> > > #include <string.h> > > > > +// from x264.h, for quant_offsets, Macroblocks are 16x16 // blocks of > > +pixels (with respect to the luma plane) #define MB_SIZE 16 > > + > > typedef struct X264Context { > > AVClass *class; > > x264_param_t params; > > @@ -345,6 +349,42 @@ static int X264_frame(AVCodecContext *ctx, > AVPacket *pkt, const AVFrame *frame, > > } > > } > > } > > + > > + AVFrameSideData *sd = av_frame_get_side_data(frame, > AV_FRAME_DATA_ROIS); > > + if (sd != NULL) { > > + if (x4->params.rc.i_aq_mode == X264_AQ_NONE) { > > + av_log(ctx, AV_LOG_WARNING, "Adaptive quantization must be > enabled to use ROI encoding, skipping ROI.\n"); > > + } else { > > + if (frame->interlaced_frame == 0) { > > + size_t mbx = (frame->width + MB_SIZE - 1) / MB_SIZE; > > + size_t mby = (frame->height + MB_SIZE - 1) / MB_SIZE; > > + float* qoffsets; > > + qoffsets = (float*)av_malloc(sizeof(*qoffsets) * mbx * mby); > > + if (qoffsets == NULL) > > + return AVERROR(ENOMEM); > > + memset(qoffsets, 0, sizeof(*qoffsets) * mbx * > > + mby); > > av_mallocz_array() > thanks, will change to it. > > > > + > > + size_t nb_rois = sd->size / sizeof(AVROI); > > + AVROI* rois = (AVROI*)sd->data; > > + for (size_t roi = 0; roi < nb_rois; roi++) { > > + int starty = FFMIN(mby, rois[roi].top / MB_SIZE); > > + int endy = FFMIN(mby, (rois[roi].bottom + MB_SIZE - 1)/ > MB_SIZE); > > + int startx = FFMIN(mbx, rois[roi].left / MB_SIZE); > > + int endx = FFMIN(mbx, (rois[roi].right + MB_SIZE - 1)/ > MB_SIZE); > > + for (int y = starty; y < endy; y++) { > > + for (int x = startx; x < endx; x++) { > > + qoffsets[x + y*mbx] = rois[roi].qoffset; > > + } > > + } > > + } > > + > > + x4->pic.prop.quant_offsets = qoffsets; > > + x4->pic.prop.quant_offsets_free = av_free; > > + } else { > > + av_log(ctx, AV_LOG_WARNING, "interlaced_frame not > supported for ROI encoding yet, skipping ROI.\n"); > > + } > > + } > > + } > > } > > > > do { > > diff --git a/libavutil/frame.c b/libavutil/frame.c index > > 34a6210..bebc50e 100644 > > --- a/libavutil/frame.c > > +++ b/libavutil/frame.c > > @@ -841,6 +841,7 @@ const char *av_frame_side_data_name(enum > AVFrameSideDataType type) > > case AV_FRAME_DATA_QP_TABLE_DATA: return "QP table data"; > > #endif > > case AV_FRAME_DATA_DYNAMIC_HDR_PLUS: return "HDR Dynamic > Metadata > > SMPTE2094-40 (HDR10+)"; > > + case AV_FRAME_DATA_ROIS: return "Regions Of Interest"; > > } > > return NULL; > > } > > diff --git a/libavutil/frame.h b/libavutil/frame.h index > > 582ac47..d18d235 100644 > > --- a/libavutil/frame.h > > +++ b/libavutil/frame.h > > @@ -173,6 +173,12 @@ enum AVFrameSideDataType { > > * volume transform - application 4 of SMPTE 2094-40:2016 standard. > > */ > > AV_FRAME_DATA_DYNAMIC_HDR_PLUS, > > + > > + /** > > + * Regions Of Interest, the number of ROI area is implied > > + * in the size of buf. > > + */ > > + AV_FRAME_DATA_ROIS, > > }; > > the API addition to libavutil should be in a seperate patch from the use of the > API, it also must bump the version thanks, will separate the patch and bump the libavutil version. > > The text describing AV_FRAME_DATA_ROIS also is inadequate to use it it > doesnt really describe what this side data looks like exactly thanks, will fix it. > > [...] > -- > Michael GnuPG fingerprint: > 9FF2128B147EF6730BADF133611EC787040B0FAB > > If you fake or manipulate statistics in a paper in physics you will never get a > job again. > If you fake or manipulate statistics in a paper in medicin you will get a job for > life at the pharma industry.
diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c index a68d0a7..a4f8677 100644 --- a/libavcodec/libx264.c +++ b/libavcodec/libx264.c @@ -40,6 +40,10 @@ #include <stdlib.h> #include <string.h> +// from x264.h, for quant_offsets, Macroblocks are 16x16 +// blocks of pixels (with respect to the luma plane) +#define MB_SIZE 16 + typedef struct X264Context { AVClass *class; x264_param_t params; @@ -345,6 +349,42 @@ static int X264_frame(AVCodecContext *ctx, AVPacket *pkt, const AVFrame *frame, } } } + + AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_ROIS); + if (sd != NULL) { + if (x4->params.rc.i_aq_mode == X264_AQ_NONE) { + av_log(ctx, AV_LOG_WARNING, "Adaptive quantization must be enabled to use ROI encoding, skipping ROI.\n"); + } else { + if (frame->interlaced_frame == 0) { + size_t mbx = (frame->width + MB_SIZE - 1) / MB_SIZE; + size_t mby = (frame->height + MB_SIZE - 1) / MB_SIZE; + float* qoffsets; + qoffsets = (float*)av_malloc(sizeof(*qoffsets) * mbx * mby); + if (qoffsets == NULL) + return AVERROR(ENOMEM); + memset(qoffsets, 0, sizeof(*qoffsets) * mbx * mby); + + size_t nb_rois = sd->size / sizeof(AVROI); + AVROI* rois = (AVROI*)sd->data; + for (size_t roi = 0; roi < nb_rois; roi++) { + int starty = FFMIN(mby, rois[roi].top / MB_SIZE); + int endy = FFMIN(mby, (rois[roi].bottom + MB_SIZE - 1)/ MB_SIZE); + int startx = FFMIN(mbx, rois[roi].left / MB_SIZE); + int endx = FFMIN(mbx, (rois[roi].right + MB_SIZE - 1)/ MB_SIZE); + for (int y = starty; y < endy; y++) { + for (int x = startx; x < endx; x++) { + qoffsets[x + y*mbx] = rois[roi].qoffset; + } + } + } + + x4->pic.prop.quant_offsets = qoffsets; + x4->pic.prop.quant_offsets_free = av_free; + } else { + av_log(ctx, AV_LOG_WARNING, "interlaced_frame not supported for ROI encoding yet, skipping ROI.\n"); + } + } + } } do { diff --git a/libavutil/frame.c b/libavutil/frame.c index 34a6210..bebc50e 100644 --- a/libavutil/frame.c +++ b/libavutil/frame.c @@ -841,6 +841,7 @@ const char *av_frame_side_data_name(enum AVFrameSideDataType type) case AV_FRAME_DATA_QP_TABLE_DATA: return "QP table data"; #endif case AV_FRAME_DATA_DYNAMIC_HDR_PLUS: return "HDR Dynamic Metadata SMPTE2094-40 (HDR10+)"; + case AV_FRAME_DATA_ROIS: return "Regions Of Interest"; } return NULL; } diff --git a/libavutil/frame.h b/libavutil/frame.h index 582ac47..d18d235 100644 --- a/libavutil/frame.h +++ b/libavutil/frame.h @@ -173,6 +173,12 @@ enum AVFrameSideDataType { * volume transform - application 4 of SMPTE 2094-40:2016 standard. */ AV_FRAME_DATA_DYNAMIC_HDR_PLUS, + + /** + * Regions Of Interest, the number of ROI area is implied + * in the size of buf. + */ + AV_FRAME_DATA_ROIS, }; enum AVActiveFormatDescription { @@ -200,6 +206,19 @@ typedef struct AVFrameSideData { AVBufferRef *buf; } AVFrameSideData; +typedef struct AVROI { + /* coordinates at frame pixel level. + * It will be extended internally if the codec requires an alignment. + * If the regions overlap, the last value in the list will be used. + */ + size_t top; + size_t bottom; + size_t left; + size_t right; + // quant offset is encoder dependent + int qoffset; +} AVROI; + /** * This structure describes decoded (raw) audio or video data. *
This patchset contains two patches. - the first patch (this patch) finished the code and ask for upstream. - the second patch is just a quick example on how to generate ROI info. The encoders such as libx264 support different QPs offset for different MBs, it makes possible for ROI-based encoding. It makes sense to add support within ffmpeg to generate/accept ROI infos and pass into encoders. Typical usage: After AVFrame is decoded, a ffmpeg filter or user's code generates ROI info for that frame, and the encoder finally does the ROI-based encoding. This patch just enabled the path from ffmpeg to libx264, the more encoders can be added later. Signed-off-by: Guo, Yejun <yejun.guo@intel.com> --- libavcodec/libx264.c | 40 ++++++++++++++++++++++++++++++++++++++++ libavutil/frame.c | 1 + libavutil/frame.h | 19 +++++++++++++++++++ 3 files changed, 60 insertions(+)