Message ID | 20210326080931.2952-4-yejun.guo@intel.com |
---|---|
State | Superseded |
Headers | show |
Series | [FFmpeg-devel,V6,1/6] lavfi/dnn_backend_openvino.c: only allow DFT_PROCESS_FRAME to get output dim | expand |
Context | Check | Description |
---|---|---|
andriy/x86_make | success | Make finished |
andriy/x86_make_fate | success | Make fate finished |
andriy/PPC64_make | success | Make finished |
andriy/PPC64_make_fate | success | Make fate finished |
Quoting Guo, Yejun (2021-03-26 09:09:29) > Signed-off-by: Guo, Yejun <yejun.guo@intel.com> > --- > doc/APIchanges | 2 ++ > libavutil/Makefile | 1 + > libavutil/boundingbox.h | 79 +++++++++++++++++++++++++++++++++++++++++ > libavutil/frame.c | 1 + > libavutil/frame.h | 7 ++++ > 5 files changed, 90 insertions(+) > create mode 100644 libavutil/boundingbox.h > > diff --git a/doc/APIchanges b/doc/APIchanges > index b41dadee8d..160b020d9d 100644 > --- a/doc/APIchanges > +++ b/doc/APIchanges > @@ -14,6 +14,8 @@ libavutil: 2017-10-21 > > > API changes, most recent first: > +2021-03-xx - xxxxxxxxxx - lavu 56.xx.100 - frame.h boundingbox.h > + Add AV_FRAME_DATA_BOUNDING_BOXES > > 2021-03-21 - xxxxxxxxxx - lavu 56.72.100 - frame.h > Deprecated av_get_colorspace_name(). > diff --git a/libavutil/Makefile b/libavutil/Makefile > index 27bafe9e12..7e1dc713ac 100644 > --- a/libavutil/Makefile > +++ b/libavutil/Makefile > @@ -11,6 +11,7 @@ HEADERS = adler32.h \ > avutil.h \ > base64.h \ > blowfish.h \ > + boundingbox.h \ > bprint.h \ > bswap.h \ > buffer.h \ > diff --git a/libavutil/boundingbox.h b/libavutil/boundingbox.h > new file mode 100644 > index 0000000000..5e2fdd55e9 > --- /dev/null > +++ b/libavutil/boundingbox.h > @@ -0,0 +1,79 @@ > +/* > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > + */ > + > +#ifndef AVUTIL_BOUNDINGBOX_H > +#define AVUTIL_BOUNDINGBOX_H > + > +#include "libavutil/rational.h" > + > +typedef struct AVBoundingBox { > + /** > + * Distance in pixels from the top edge of the frame to top > + * and bottom, and from the left edge of the frame to left and > + * right, defining the bounding box. > + */ > + int top; > + int left; > + int bottom; > + int right; Is this allowed to be negative? Can/should this be size_t? > + > +#define AV_BBOX_LABEL_NAME_MAX_SIZE 32 > + > + /** > + * Detect result with confidence > + */ > + char detect_label[AV_BBOX_LABEL_NAME_MAX_SIZE]; > + AVRational detect_confidence; > + > + /** > + * At most 4 classifications based on the detected bounding box. > + * For example, we can get max 4 different attributes with 4 different > + * DNN models on one bounding box. > + * classify_count is zero if no classification. > + */ > +#define AV_NUM_BBOX_CLASSIFY 4 > + uint32_t classify_count; size_t? > + char classify_labels[AV_NUM_BBOX_CLASSIFY][AV_BBOX_LABEL_NAME_MAX_SIZE]; > + AVRational classify_confidences[AV_NUM_BBOX_CLASSIFY]; > +} AVBoundingBox; > + > +typedef struct AVBoundingBoxHeader { This structure is not extensible in an ABI-compatible way. > + /** > + * Information about how the bounding box is generated. > + * for example, the DNN model name. > + */ > + char source[128]; > + > + /** > + * The size of frame when it is detected. > + */ > + int frame_width; > + int frame_height; Why? > + > + /** > + * Number of bounding boxes. > + */ > + uint32_t nb_bbox; size_t?
> -----Original Message----- > From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Anton > Khirnov > Sent: 2021年4月1日 16:13 > To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> > Subject: Re: [FFmpeg-devel] [PATCH V6 4/6] lavu: add side data > AV_FRAME_DATA_BOUNDING_BOXES > > Quoting Guo, Yejun (2021-03-26 09:09:29) > > Signed-off-by: Guo, Yejun <yejun.guo@intel.com> > > --- > > doc/APIchanges | 2 ++ > > libavutil/Makefile | 1 + > > libavutil/boundingbox.h | 79 > +++++++++++++++++++++++++++++++++++++++++ > > libavutil/frame.c | 1 + > > libavutil/frame.h | 7 ++++ > > 5 files changed, 90 insertions(+) > > + > > +#ifndef AVUTIL_BOUNDINGBOX_H > > +#define AVUTIL_BOUNDINGBOX_H > > + > > +#include "libavutil/rational.h" > > + > > +typedef struct AVBoundingBox { > > + /** > > + * Distance in pixels from the top edge of the frame to top > > + * and bottom, and from the left edge of the frame to left and > > + * right, defining the bounding box. > > + */ > > + int top; > > + int left; > > + int bottom; > > + int right; > > Is this allowed to be negative? Can/should this be size_t? There was a long discussion about size_t/int/uint32_t when I added struct AVRegionOfInterest in frame.h, and finally size_t is not chosen. > > > + > > +#define AV_BBOX_LABEL_NAME_MAX_SIZE 32 > > + > > + /** > > + * Detect result with confidence > > + */ > > + char detect_label[AV_BBOX_LABEL_NAME_MAX_SIZE]; > > + AVRational detect_confidence; > > + > > + /** > > + * At most 4 classifications based on the detected bounding box. > > + * For example, we can get max 4 different attributes with 4 different > > + * DNN models on one bounding box. > > + * classify_count is zero if no classification. > > + */ > > +#define AV_NUM_BBOX_CLASSIFY 4 > > + uint32_t classify_count; > > size_t? > > > + char > classify_labels[AV_NUM_BBOX_CLASSIFY][AV_BBOX_LABEL_NAME_MAX_SIZE]; > > + AVRational classify_confidences[AV_NUM_BBOX_CLASSIFY]; > > +} AVBoundingBox; > > + > > +typedef struct AVBoundingBoxHeader { > > This structure is not extensible in an ABI-compatible way. yes, we can add a version number (enum AVBBoxHeaderVersion, see below) at the beginning of the struct, and the user needs to set it with AV_BBOX_HEADER_CURRENT. It is not elegant, is this what we really want? enum AVBBoxHeaderVersion { AV_BBOX_HEADER_NONE = 0, AV_BBOX_HEADER_V1, AV_BBOX_HEADER_CURRENT = AV_BBOX_HEADER_V1, }; typedef struct AVBoundingBoxHeader { enum AVBBoxHeaderVersion version; union { AVBoundingBoxHeaderV1 v1; } header; } AVBoundingBoxHeader; We have carefully thought/discussed about the struct for months, and there's little chance for later update, imho. > > > + /** > > + * Information about how the bounding box is generated. > > + * for example, the DNN model name. > > + */ > > + char source[128]; > > + > > + /** > > + * The size of frame when it is detected. > > + */ > > + int frame_width; > > + int frame_height; > > Why? so we know if the value of top/left/bottom/right in AVBoundingBox can be used directly in other later filters. for example, the filter chain is: dnn_detect -> scale -> dnn_classify (in plan) In filter dnn_classify, it needs to check the frame size to know if the values in bounding boxes (the detection result) are still valid. > > > + > > + /** > > + * Number of bounding boxes. > > + */ > > + uint32_t nb_bbox; > > size_t? > > -- > Anton Khirnov > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org > with subject "unsubscribe".
> -----Original Message----- > From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Guo, > Yejun > Sent: 2021年4月1日 19:32 > To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> > Subject: Re: [FFmpeg-devel] [PATCH V6 4/6] lavu: add side data > AV_FRAME_DATA_BOUNDING_BOXES > > > > > -----Original Message----- > > From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of > Anton > > Khirnov > > Sent: 2021年4月1日 16:13 > > To: FFmpeg development discussions and patches > <ffmpeg-devel@ffmpeg.org> > > Subject: Re: [FFmpeg-devel] [PATCH V6 4/6] lavu: add side data > > AV_FRAME_DATA_BOUNDING_BOXES > > > > Quoting Guo, Yejun (2021-03-26 09:09:29) > > > Signed-off-by: Guo, Yejun <yejun.guo@intel.com> > > > --- > > > doc/APIchanges | 2 ++ > > > libavutil/Makefile | 1 + > > > libavutil/boundingbox.h | 79 > > +++++++++++++++++++++++++++++++++++++++++ > > > libavutil/frame.c | 1 + > > > libavutil/frame.h | 7 ++++ > > > 5 files changed, 90 insertions(+) > > > + > > > +#ifndef AVUTIL_BOUNDINGBOX_H > > > +#define AVUTIL_BOUNDINGBOX_H > > > + > > > +#include "libavutil/rational.h" > > > + > > > +typedef struct AVBoundingBox { > > > + /** > > > + * Distance in pixels from the top edge of the frame to top > > > + * and bottom, and from the left edge of the frame to left and > > > + * right, defining the bounding box. > > > + */ > > > + int top; > > > + int left; > > > + int bottom; > > > + int right; > > > > Is this allowed to be negative? Can/should this be size_t? > > There was a long discussion about size_t/int/uint32_t when I added > struct AVRegionOfInterest in frame.h, and finally size_t is not chosen. > > > > > > + > > > +#define AV_BBOX_LABEL_NAME_MAX_SIZE 32 > > > + > > > + /** > > > + * Detect result with confidence > > > + */ > > > + char detect_label[AV_BBOX_LABEL_NAME_MAX_SIZE]; > > > + AVRational detect_confidence; > > > + > > > + /** > > > + * At most 4 classifications based on the detected bounding box. > > > + * For example, we can get max 4 different attributes with 4 different > > > + * DNN models on one bounding box. > > > + * classify_count is zero if no classification. > > > + */ > > > +#define AV_NUM_BBOX_CLASSIFY 4 > > > + uint32_t classify_count; > > > > size_t? > > > > > + char > > > classify_labels[AV_NUM_BBOX_CLASSIFY][AV_BBOX_LABEL_NAME_MAX_SIZE]; > > > + AVRational classify_confidences[AV_NUM_BBOX_CLASSIFY]; > > > +} AVBoundingBox; > > > + > > > +typedef struct AVBoundingBoxHeader { > > > > This structure is not extensible in an ABI-compatible way. > > yes, we can add a version number (enum AVBBoxHeaderVersion, see below) at > the > beginning of the struct, and the user needs to set it with > AV_BBOX_HEADER_CURRENT. > It is not elegant, is this what we really want? > > enum AVBBoxHeaderVersion { > AV_BBOX_HEADER_NONE = 0, > AV_BBOX_HEADER_V1, > AV_BBOX_HEADER_CURRENT = AV_BBOX_HEADER_V1, > }; > > typedef struct AVBoundingBoxHeader { > enum AVBBoxHeaderVersion version; > union { > AVBoundingBoxHeaderV1 v1; > } header; > } AVBoundingBoxHeader; > > > We have carefully thought/discussed about the struct for months, and there's > little chance for later update, imho. And my opinion is to still use the code in this patch. For the worst case, if we need to change the struct some time later, we still have a timing before the next release 4.5. > > > > > > > + /** > > > + * Information about how the bounding box is generated. > > > + * for example, the DNN model name. > > > + */ > > > + char source[128]; > > > + > > > + /** > > > + * The size of frame when it is detected. > > > + */ > > > + int frame_width; > > > + int frame_height; > > > > Why? > > so we know if the value of top/left/bottom/right in AVBoundingBox > can be used directly in other later filters. > > for example, the filter chain is: dnn_detect -> scale -> dnn_classify (in plan) > > In filter dnn_classify, it needs to check the frame size to know if the values > in bounding boxes (the detection result) are still valid. > > > > > > + > > > + /** > > > + * Number of bounding boxes. > > > + */ > > > + uint32_t nb_bbox; > > > > size_t? > > > > -- > > Anton Khirnov > > _______________________________________________ > > ffmpeg-devel mailing list > > ffmpeg-devel@ffmpeg.org > > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > > > To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org > > with subject "unsubscribe". > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Guo, Yejun (12021-04-01): > > Is this allowed to be negative? Can/should this be size_t? > There was a long discussion about size_t/int/uint32_t when I added > struct AVRegionOfInterest in frame.h, and finally size_t is not chosen. Then at least unsigned. > yes, we can add a version number (enum AVBBoxHeaderVersion, see below) at the > beginning of the struct, and the user needs to set it with AV_BBOX_HEADER_CURRENT. > It is not elegant, is this what we really want? A version number does not make the structure compatible. At best, applications check the version and detect they do not support this one and report an error. At worse, they do not bother to check and it causes strange bugs. The version number does not help with compatibility, it just makes it detectable. To make the structure compatible, we have to ask ourselves: What happens if we update it? What breaks compatibility? Once we have an answer, we find a workaround. In this case, what happens is that the variable array must be at the end, and therefore its offset changes. And we cannot have a second variable array (like the name! you had to set a constant size), and we cannot update the type of the array elements. And the solution becomes obvious: let us store the offsets in the structure. So, that would be: typedef struct AVBoundingBoxHeader { ... /** * Offset of the array of bounding boxes. */ size_t bboxes_offset; /** * Offset increment in the array of bounding boxes. */ size_t bboxes_step; }; Note that with that solution, we do not need the empty array, we can do this: AVBoundingBoxHeader *ff_bounding_box_alloc(size_t nb_bbox) { struct { AVBoundingBoxHeader header; AVBoundingBox boxes[nb_bbox]; } *ret; ret = av_mallocz(sizeof(*ret)); /* add error checks */ ret->header->bboxes_offset = (char *)&ret->boxes - (char *)ret->header; ret->header->bboxes_step = sizeof(*ret->boxes); } #define AV_BOUNDING_BOX_GET(header, idx) \ ((AVBoundingBox *)((char *)(header) + (header)->bboxes_offset + (idx) * (header)->bboxes_step)) You can do the same to lift the 128 limit on the name. Regards,
On 4/4/2021 7:01 AM, Nicolas George wrote: > Guo, Yejun (12021-04-01): >>> Is this allowed to be negative? Can/should this be size_t? >> There was a long discussion about size_t/int/uint32_t when I added >> struct AVRegionOfInterest in frame.h, and finally size_t is not chosen. > > Then at least unsigned. > >> yes, we can add a version number (enum AVBBoxHeaderVersion, see below) at the >> beginning of the struct, and the user needs to set it with AV_BBOX_HEADER_CURRENT. >> It is not elegant, is this what we really want? > > A version number does not make the structure compatible. At best, > applications check the version and detect they do not support this one > and report an error. At worse, they do not bother to check and it causes > strange bugs. The version number does not help with compatibility, it > just makes it detectable. > > To make the structure compatible, we have to ask ourselves: What happens > if we update it? What breaks compatibility? Once we have an answer, we > find a workaround. > > In this case, what happens is that the variable array must be at the > end, and therefore its offset changes. And we cannot have a second > variable array (like the name! you had to set a constant size), and we > cannot update the type of the array elements. > > And the solution becomes obvious: let us store the offsets in the > structure. > > So, that would be: > > typedef struct AVBoundingBoxHeader { > ... > /** > * Offset of the array of bounding boxes. > */ > size_t bboxes_offset; > > /** > * Offset increment in the array of bounding boxes. > */ > size_t bboxes_step; > }; > > Note that with that solution, we do not need the empty array, we can do > this: > > AVBoundingBoxHeader *ff_bounding_box_alloc(size_t nb_bbox) > { > struct { > AVBoundingBoxHeader header; > AVBoundingBox boxes[nb_bbox]; > } *ret; > ret = av_mallocz(sizeof(*ret)); > /* add error checks */ > ret->header->bboxes_offset = (char *)&ret->boxes - (char *)ret->header; > ret->header->bboxes_step = sizeof(*ret->boxes); > } > > #define AV_BOUNDING_BOX_GET(header, idx) \ > ((AVBoundingBox *)((char *)(header) + (header)->bboxes_offset + (idx) * (header)->bboxes_step)) This solution is what was used for video_enc_params.h, so i agree it should be used here too. What's missing is a check for idx < nb_bbox before accessing the offset in question, so an inline function instead of a #define with a check for the above would be needed. And since both are used as frame side data, it would be ideal that the signature for the public helpers on both are the same (The standard alloc, and the alloc + wrapping into frame side data ones). > > You can do the same to lift the 128 limit on the name. > > Regards, > > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". >
Nicolas George: > Guo, Yejun (12021-04-01): >>> Is this allowed to be negative? Can/should this be size_t? >> There was a long discussion about size_t/int/uint32_t when I added >> struct AVRegionOfInterest in frame.h, and finally size_t is not chosen. > > Then at least unsigned. > >> yes, we can add a version number (enum AVBBoxHeaderVersion, see below) at the >> beginning of the struct, and the user needs to set it with AV_BBOX_HEADER_CURRENT. >> It is not elegant, is this what we really want? > > A version number does not make the structure compatible. At best, > applications check the version and detect they do not support this one > and report an error. At worse, they do not bother to check and it causes > strange bugs. The version number does not help with compatibility, it > just makes it detectable. > > To make the structure compatible, we have to ask ourselves: What happens > if we update it? What breaks compatibility? Once we have an answer, we > find a workaround. > > In this case, what happens is that the variable array must be at the > end, and therefore its offset changes. And we cannot have a second > variable array (like the name! you had to set a constant size), and we > cannot update the type of the array elements. > > And the solution becomes obvious: let us store the offsets in the > structure. > > So, that would be: > > typedef struct AVBoundingBoxHeader { > ... > /** > * Offset of the array of bounding boxes. > */ > size_t bboxes_offset; > > /** > * Offset increment in the array of bounding boxes. > */ > size_t bboxes_step; > }; > > Note that with that solution, we do not need the empty array, we can do > this: > > AVBoundingBoxHeader *ff_bounding_box_alloc(size_t nb_bbox) > { > struct { > AVBoundingBoxHeader header; > AVBoundingBox boxes[nb_bbox]; That's a variable-length array. That's a C99 feature we do not require. > } *ret; > ret = av_mallocz(sizeof(*ret)); > /* add error checks */ > ret->header->bboxes_offset = (char *)&ret->boxes - (char *)ret->header; > ret->header->bboxes_step = sizeof(*ret->boxes); > } > > #define AV_BOUNDING_BOX_GET(header, idx) \ > ((AVBoundingBox *)((char *)(header) + (header)->bboxes_offset + (idx) * (header)->bboxes_step)) > > You can do the same to lift the 128 limit on the name. > > Regards, > > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". >
James Almer: > On 4/4/2021 7:01 AM, Nicolas George wrote: >> Guo, Yejun (12021-04-01): >>>> Is this allowed to be negative? Can/should this be size_t? >>> There was a long discussion about size_t/int/uint32_t when I added >>> struct AVRegionOfInterest in frame.h, and finally size_t is not chosen. >> >> Then at least unsigned. >> >>> yes, we can add a version number (enum AVBBoxHeaderVersion, see >>> below) at the >>> beginning of the struct, and the user needs to set it with >>> AV_BBOX_HEADER_CURRENT. >>> It is not elegant, is this what we really want? >> >> A version number does not make the structure compatible. At best, >> applications check the version and detect they do not support this one >> and report an error. At worse, they do not bother to check and it causes >> strange bugs. The version number does not help with compatibility, it >> just makes it detectable. >> >> To make the structure compatible, we have to ask ourselves: What happens >> if we update it? What breaks compatibility? Once we have an answer, we >> find a workaround. >> >> In this case, what happens is that the variable array must be at the >> end, and therefore its offset changes. And we cannot have a second >> variable array (like the name! you had to set a constant size), and we >> cannot update the type of the array elements. >> >> And the solution becomes obvious: let us store the offsets in the >> structure. >> >> So, that would be: >> >> typedef struct AVBoundingBoxHeader { >> ... >> /** >> * Offset of the array of bounding boxes. >> */ >> size_t bboxes_offset; >> >> /** >> * Offset increment in the array of bounding boxes. >> */ >> size_t bboxes_step; >> }; >> >> Note that with that solution, we do not need the empty array, we can do >> this: >> >> AVBoundingBoxHeader *ff_bounding_box_alloc(size_t nb_bbox) >> { >> struct { >> AVBoundingBoxHeader header; >> AVBoundingBox boxes[nb_bbox]; >> } *ret; >> ret = av_mallocz(sizeof(*ret)); >> /* add error checks */ >> ret->header->bboxes_offset = (char *)&ret->boxes - (char >> *)ret->header; >> ret->header->bboxes_step = sizeof(*ret->boxes); >> } >> >> #define AV_BOUNDING_BOX_GET(header, idx) \ >> ((AVBoundingBox *)((char *)(header) + (header)->bboxes_offset + >> (idx) * (header)->bboxes_step)) > > This solution is what was used for video_enc_params.h, so i agree it > should be used here too. What's missing is a check for idx < nb_bbox Actually nothing in video_enc_params.c guarantees proper alignment for AVVideoBlockParams. If we added a type requiring 64bit alignment to AVVideoBlockParams and are on a 32bit system, then the blocks will be misaligned. > before accessing the offset in question, so an inline function instead > of a #define with a check for the above would be needed. > > And since both are used as frame side data, it would be ideal that the > signature for the public helpers on both are the same (The standard > alloc, and the alloc + wrapping into frame side data ones). > >> >> You can do the same to lift the 128 limit on the name. >> >> Regards, >> >> >> _______________________________________________ >> ffmpeg-devel mailing list >> ffmpeg-devel@ffmpeg.org >> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel >> >> To unsubscribe, visit link above, or email >> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". >> > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
James Almer (12021-04-04): > This solution is what was used for video_enc_params.h, so i agree it should > be used here too. What's missing is a check for idx < nb_bbox before > accessing the offset in question, so an inline function instead of a #define > with a check for the above would be needed. That may be a good idea, but please notice that this check would not be there with just an array or a pointer. FFmpeg is C, not Java. We do not HAVE to add the check. > And since both are used as frame side data, it would be ideal that the > signature for the public helpers on both are the same (The standard alloc, > and the alloc + wrapping into frame side data ones). I agree with this. Regards,
Andreas Rheinhardt (12021-04-04):
> That's a variable-length array. That's a C99 feature we do not require.
We have used variable-length arrays in the past. We have eliminated them
from our code base not because lack of support, IIRC, but because they
have a tendency to ruin compiler optimization.
This is not a variable-length array but a pointer to a variable-length
array, it does not cause the optimization problems. As it is, it is just
a trick to let the compiler compute offsets for us taking into account
alignment requirements.
Regards,
> -----Original Message----- > From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Nicolas > George > Sent: 2021年4月4日 18:02 > To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> > Subject: Re: [FFmpeg-devel] [PATCH V6 4/6] lavu: add side data > AV_FRAME_DATA_BOUNDING_BOXES > > Guo, Yejun (12021-04-01): > > > Is this allowed to be negative? Can/should this be size_t? > > There was a long discussion about size_t/int/uint32_t when I added > > struct AVRegionOfInterest in frame.h, and finally size_t is not chosen. > > Then at least unsigned. yes, 'unsigned int' is another option. but imho, it might be better to use the same data type of AVFrame.width/height, they are 'int'. > > > yes, we can add a version number (enum AVBBoxHeaderVersion, see below) > at the > > beginning of the struct, and the user needs to set it with > AV_BBOX_HEADER_CURRENT. > > It is not elegant, is this what we really want? > > A version number does not make the structure compatible. At best, > applications check the version and detect they do not support this one > and report an error. At worse, they do not bother to check and it causes > strange bugs. The version number does not help with compatibility, it > just makes it detectable. My idea was to use separate code path for AVBoundingBoxHeaderV1, AVBoundingBoxHeaderV2, ... (not union in my previous email) according to the version number. And yes, it doesn't work if the user does not set or check the version number correctly. > > To make the structure compatible, we have to ask ourselves: What happens > if we update it? What breaks compatibility? Once we have an answer, we > find a workaround. > > In this case, what happens is that the variable array must be at the > end, and therefore its offset changes. And we cannot have a second > variable array (like the name! you had to set a constant size), and we > cannot update the type of the array elements. > > And the solution becomes obvious: let us store the offsets in the > structure. Thanks a lot for your nice explanation! thank you. > > So, that would be: > > typedef struct AVBoundingBoxHeader { > ... > /** > * Offset of the array of bounding boxes. > */ > size_t bboxes_offset; > > /** > * Offset increment in the array of bounding boxes. > */ > size_t bboxes_step; > }; > > Note that with that solution, we do not need the empty array, we can do > this: > > AVBoundingBoxHeader *ff_bounding_box_alloc(size_t nb_bbox) > { > struct { > AVBoundingBoxHeader header; > AVBoundingBox boxes[nb_bbox]; > } *ret; > ret = av_mallocz(sizeof(*ret)); > /* add error checks */ > ret->header->bboxes_offset = (char *)&ret->boxes - (char *)ret->header; > ret->header->bboxes_step = sizeof(*ret->boxes); > } > > #define AV_BOUNDING_BOX_GET(header, idx) \ > ((AVBoundingBox *)((char *)(header) + (header)->bboxes_offset + (idx) * > (header)->bboxes_step)) thanks, and I'll update the naming as what James mentioned. > > You can do the same to lift the 128 limit on the name. yes, we can now handle more variable arrays with this method. For the special case 'char source[128]' in AVBoundingBoxHeader, it will be written for several times, for example, vf_dnn_detect.c writes model name of detection into source, and later, vf_dnn_classify.c (in plan) will append the model name of classification into source. So, source will be at the end of the side data, and the side data will be reallocated when needed. > > Regards, > > -- > Nicolas George
> -----Original Message----- > From: Guo, Yejun > Sent: 2021年4月5日 17:56 > To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> > Subject: RE: [FFmpeg-devel] [PATCH V6 4/6] lavu: add side data > AV_FRAME_DATA_BOUNDING_BOXES > > > > > -----Original Message----- > > From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of > Nicolas > > George > > Sent: 2021年4月4日 18:02 > > To: FFmpeg development discussions and patches > <ffmpeg-devel@ffmpeg.org> > > Subject: Re: [FFmpeg-devel] [PATCH V6 4/6] lavu: add side data > > AV_FRAME_DATA_BOUNDING_BOXES > > > > Guo, Yejun (12021-04-01): > > > > Is this allowed to be negative? Can/should this be size_t? > > > There was a long discussion about size_t/int/uint32_t when I added > > > struct AVRegionOfInterest in frame.h, and finally size_t is not chosen. > > > > Then at least unsigned. > > yes, 'unsigned int' is another option. but imho, it might be better to use > the same data type of AVFrame.width/height, they are 'int'. > > > > > > yes, we can add a version number (enum AVBBoxHeaderVersion, see below) > > at the > > > beginning of the struct, and the user needs to set it with > > AV_BBOX_HEADER_CURRENT. > > > It is not elegant, is this what we really want? > > > > A version number does not make the structure compatible. At best, > > applications check the version and detect they do not support this one > > and report an error. At worse, they do not bother to check and it causes > > strange bugs. The version number does not help with compatibility, it > > just makes it detectable. > > My idea was to use separate code path for AVBoundingBoxHeaderV1, > AVBoundingBoxHeaderV2, ... (not union in my previous email) according to > the version number. And yes, it doesn't work if the user does not set or > check the version number correctly. > > > > > To make the structure compatible, we have to ask ourselves: What happens > > if we update it? What breaks compatibility? Once we have an answer, we > > find a workaround. > > > > In this case, what happens is that the variable array must be at the > > end, and therefore its offset changes. And we cannot have a second > > variable array (like the name! you had to set a constant size), and we > > cannot update the type of the array elements. > > > > And the solution becomes obvious: let us store the offsets in the > > structure. > > Thanks a lot for your nice explanation! thank you. > > > > > So, that would be: > > > > typedef struct AVBoundingBoxHeader { > > ... > > /** > > * Offset of the array of bounding boxes. > > */ > > size_t bboxes_offset; > > > > /** > > * Offset increment in the array of bounding boxes. > > */ > > size_t bboxes_step; > > }; > > > > Note that with that solution, we do not need the empty array, we can do > > this: > > > > AVBoundingBoxHeader *ff_bounding_box_alloc(size_t nb_bbox) > > { > > struct { > > AVBoundingBoxHeader header; > > AVBoundingBox boxes[nb_bbox]; > > } *ret; > > ret = av_mallocz(sizeof(*ret)); > > /* add error checks */ > > ret->header->bboxes_offset = (char *)&ret->boxes - (char *)ret->header; > > ret->header->bboxes_step = sizeof(*ret->boxes); > > } > > > > #define AV_BOUNDING_BOX_GET(header, idx) \ > > ((AVBoundingBox *)((char *)(header) + (header)->bboxes_offset + (idx) * > > (header)->bboxes_step)) > > thanks, and I'll update the naming as what James mentioned. > > > > > You can do the same to lift the 128 limit on the name. > > yes, we can now handle more variable arrays with this method. Looks that we could not support more variable arrays without requiring special compiler feature. See the sample code below, and I got compile error on my ubuntu 18.04 with default compiler and default configure option. sample code: AVBoundingBoxHeader * av_bbox_alloc(uint32_t nb_bboxes, size_t name_size, size_t *out_size) { struct { AVBoundingBoxHeader header; AVBoundingBox boxes[nb_bboxes]; char name[name_size]; } *ret; ... } compile error on ubuntu 18.04 with default setting: error: ISO C90 forbids variable length array ‘boxes’ [-Werror=vla] AVBoundingBox boxes[nb_bboxes]; ... So, I might still use 'char source[128]' in AVBoundingBoxHeader, and use the following code to meet the alignment requirement. struct { AVBoundingBoxHeader header; AVBoundingBox boxes[]; } *ret; ret = av_mallocz(sizeof(*ret) + sizeof(AVBoundingBox) * nb_bboxes);
Guo, Yejun (12021-04-06): > compile error on ubuntu 18.04 with default setting: > error: ISO C90 forbids variable length array ‘boxes’ [-Werror=vla] > AVBoundingBox boxes[nb_bboxes]; This warning is explicitly added by configure it, we do not have to keep it if it hiders us. If people agree with my analysis that pointers to VLA used to compute sizes and offsets in an isolated function are not a problem, unlike actual VLAs on the stack, then we can just remove it. I do not think we are at risk of adding VLAs by mistake. Regards,
> -----Original Message----- > From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Nicolas > George > Sent: 2021年4月7日 0:46 > To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> > Subject: Re: [FFmpeg-devel] [PATCH V6 4/6] lavu: add side data > AV_FRAME_DATA_BOUNDING_BOXES > > Guo, Yejun (12021-04-06): > > compile error on ubuntu 18.04 with default setting: > > error: ISO C90 forbids variable length array ‘boxes’ [-Werror=vla] > > AVBoundingBox boxes[nb_bboxes]; > > This warning is explicitly added by configure it, we do not have to keep > it if it hiders us. > > If people agree with my analysis that pointers to VLA used to compute > sizes and offsets in an isolated function are not a problem, unlike > actual VLAs on the stack, then we can just remove it. I do not think we > are at risk of adding VLAs by mistake. I see, thanks. @Michael, any comment on this, thanks, '-Werror=vla' is enabled by 293e5423 in 2012. I can send a patch to remove this in configure for more comments. Then, I can update my detection patch set according to the result.
On Tue, Apr 6, 2021 at 6:45 PM Nicolas George <george@nsup.org> wrote: > > Guo, Yejun (12021-04-06): > > compile error on ubuntu 18.04 with default setting: > > error: ISO C90 forbids variable length array ‘boxes’ [-Werror=vla] > > AVBoundingBox boxes[nb_bboxes]; > > This warning is explicitly added by configure it, we do not have to keep > it if it hiders us. > > If people agree with my analysis that pointers to VLA used to compute > sizes and offsets in an isolated function are not a problem, unlike > actual VLAs on the stack, then we can just remove it. I do not think we > are at risk of adding VLAs by mistake. > Unfortunately, this is not correct. I don't know about optimization problems, but I do know that compilers we do support, which are C11 compliant (where VLAs are optional), do not implement them (just confirmed this with a test, as well, to make sure). - Hendrik
Hendrik Leppkes (12021-04-07): > Unfortunately, this is not correct. I don't know about optimization > problems, but I do know that compilers we do support, which are C11 > compliant (where VLAs are optional), do not implement them (just > confirmed this with a test, as well, to make sure). Too bad. Can you document which compilers they are? This is an information that should be centralized. We will have have a backup implementation. Regards,
Nicolas George: > Hendrik Leppkes (12021-04-07): >> Unfortunately, this is not correct. I don't know about optimization >> problems, but I do know that compilers we do support, which are C11 >> compliant (where VLAs are optional), do not implement them (just >> confirmed this with a test, as well, to make sure). > > Too bad. > > Can you document which compilers they are? This is an information that > should be centralized. > > We will have have a backup implementation. > Why a backup implementation? Why not just avoid VLAs altogether (I don't like using features that are optional in modern standards)? All one needs to calculate a suitable offset is the alignment of AVBoundingBox. This can be done via _Alignof for C11 compilers and via compiler extensions for lots of other compilers (e.g. GCC supports __alignof__ since at least 3.1). And a generic alignment of (say) 8 can be used as a fallback. - Andreas
Andreas Rheinhardt (12021-04-07): > Why a backup implementation? Because: > And a generic alignment of (say) 8 can be used as a fallback. I believe using a pointer to VLA is more elegant than reimplementing the compiler's feature using _Alignof and clumsy arithmetic, but it is eventually for whoever writes it to decide. Also, assuming the alignment is the same as the size is rather a safe assumption for elementary types. Regards,
> -----Original Message----- > From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Nicolas > George > Sent: 2021年4月7日 18:06 > To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> > Subject: Re: [FFmpeg-devel] [PATCH V6 4/6] lavu: add side data > AV_FRAME_DATA_BOUNDING_BOXES > > Andreas Rheinhardt (12021-04-07): > > Why a backup implementation? > > Because: > > > And a generic alignment of (say) 8 can be used as a fallback. > > I believe using a pointer to VLA is more elegant than reimplementing the > compiler's feature using _Alignof and clumsy arithmetic, but it is > eventually for whoever writes it to decide. > > Also, assuming the alignment is the same as the size is rather a safe > assumption for elementary types. > Thanks all, I might choose a simple and straight-forward method, instead of using align keyword.
diff --git a/doc/APIchanges b/doc/APIchanges index b41dadee8d..160b020d9d 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -14,6 +14,8 @@ libavutil: 2017-10-21 API changes, most recent first: +2021-03-xx - xxxxxxxxxx - lavu 56.xx.100 - frame.h boundingbox.h + Add AV_FRAME_DATA_BOUNDING_BOXES 2021-03-21 - xxxxxxxxxx - lavu 56.72.100 - frame.h Deprecated av_get_colorspace_name(). diff --git a/libavutil/Makefile b/libavutil/Makefile index 27bafe9e12..7e1dc713ac 100644 --- a/libavutil/Makefile +++ b/libavutil/Makefile @@ -11,6 +11,7 @@ HEADERS = adler32.h \ avutil.h \ base64.h \ blowfish.h \ + boundingbox.h \ bprint.h \ bswap.h \ buffer.h \ diff --git a/libavutil/boundingbox.h b/libavutil/boundingbox.h new file mode 100644 index 0000000000..5e2fdd55e9 --- /dev/null +++ b/libavutil/boundingbox.h @@ -0,0 +1,79 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_BOUNDINGBOX_H +#define AVUTIL_BOUNDINGBOX_H + +#include "libavutil/rational.h" + +typedef struct AVBoundingBox { + /** + * Distance in pixels from the top edge of the frame to top + * and bottom, and from the left edge of the frame to left and + * right, defining the bounding box. + */ + int top; + int left; + int bottom; + int right; + +#define AV_BBOX_LABEL_NAME_MAX_SIZE 32 + + /** + * Detect result with confidence + */ + char detect_label[AV_BBOX_LABEL_NAME_MAX_SIZE]; + AVRational detect_confidence; + + /** + * At most 4 classifications based on the detected bounding box. + * For example, we can get max 4 different attributes with 4 different + * DNN models on one bounding box. + * classify_count is zero if no classification. + */ +#define AV_NUM_BBOX_CLASSIFY 4 + uint32_t classify_count; + char classify_labels[AV_NUM_BBOX_CLASSIFY][AV_BBOX_LABEL_NAME_MAX_SIZE]; + AVRational classify_confidences[AV_NUM_BBOX_CLASSIFY]; +} AVBoundingBox; + +typedef struct AVBoundingBoxHeader { + /** + * Information about how the bounding box is generated. + * for example, the DNN model name. + */ + char source[128]; + + /** + * The size of frame when it is detected. + */ + int frame_width; + int frame_height; + + /** + * Number of bounding boxes. + */ + uint32_t nb_bbox; + + /** + * Pointer to the array of AVBoundingBox. + */ + AVBoundingBox bboxes[]; +} AVBoundingBoxHeader; + +#endif diff --git a/libavutil/frame.c b/libavutil/frame.c index 31a2117b82..99080af777 100644 --- a/libavutil/frame.c +++ b/libavutil/frame.c @@ -853,6 +853,7 @@ const char *av_frame_side_data_name(enum AVFrameSideDataType type) case AV_FRAME_DATA_VIDEO_ENC_PARAMS: return "Video encoding parameters"; case AV_FRAME_DATA_SEI_UNREGISTERED: return "H.26[45] User Data Unregistered SEI message"; case AV_FRAME_DATA_FILM_GRAIN_PARAMS: return "Film grain parameters"; + case AV_FRAME_DATA_BOUNDING_BOXES: return "Bounding boxes"; } return NULL; } diff --git a/libavutil/frame.h b/libavutil/frame.h index a5ed91b20a..9b7145199c 100644 --- a/libavutil/frame.h +++ b/libavutil/frame.h @@ -198,6 +198,13 @@ enum AVFrameSideDataType { * Must be present for every frame which should have film grain applied. */ AV_FRAME_DATA_FILM_GRAIN_PARAMS, + + /** + * Bounding boxes for object detection and classification, the data is a BoundingBoxHeader + * followed with an array of BoudingBox, and BoundingBoxHeader.nb_bbox is the number of + * array element. + */ + AV_FRAME_DATA_BOUNDING_BOXES, }; enum AVActiveFormatDescription {
Signed-off-by: Guo, Yejun <yejun.guo@intel.com> --- doc/APIchanges | 2 ++ libavutil/Makefile | 1 + libavutil/boundingbox.h | 79 +++++++++++++++++++++++++++++++++++++++++ libavutil/frame.c | 1 + libavutil/frame.h | 7 ++++ 5 files changed, 90 insertions(+) create mode 100644 libavutil/boundingbox.h