diff mbox series

[FFmpeg-devel,v4,03/21] cbs: Describe allocate/free methods in tabular form

Message ID 20200223234124.17689-3-sw@jkqxz.net
State New
Headers show
Series [FFmpeg-devel,v4,01/21] cbs: Mention all codecs in unit type comment | expand

Checks

Context Check Description
andriy/ffmpeg-patchwork success Make fate finished

Commit Message

Mark Thompson Feb. 23, 2020, 11:41 p.m. UTC
Unit types are split into three categories, depending on how their
content is managed:
* POD structure - these require no special treatment.
* Structure containing references to refcounted buffers - these can use
  a common free function when the offsets of all the internal references
  are known.
* More complex structures - these still require ad-hoc treatment.

For each codec we can then maintain a table of descriptors for each set of
equivalent unit types, defining the mechanism needed to allocate/free that
unit content.  This is not required to be used immediately - a new alloc
function supports this, but does not replace the old one which works without
referring to these tables.
---
 libavcodec/cbs.c          | 69 +++++++++++++++++++++++++++++++++++++++
 libavcodec/cbs.h          |  9 +++++
 libavcodec/cbs_internal.h | 60 ++++++++++++++++++++++++++++++++++
 3 files changed, 138 insertions(+)

Comments

Andreas Rheinhardt Feb. 24, 2020, 5:19 p.m. UTC | #1
Mark Thompson:
> Unit types are split into three categories, depending on how their
> content is managed:
> * POD structure - these require no special treatment.
> * Structure containing references to refcounted buffers - these can use
>   a common free function when the offsets of all the internal references
>   are known.
> * More complex structures - these still require ad-hoc treatment.
> 
> For each codec we can then maintain a table of descriptors for each set of
> equivalent unit types, defining the mechanism needed to allocate/free that
> unit content.  This is not required to be used immediately - a new alloc
> function supports this, but does not replace the old one which works without
> referring to these tables.
> ---
>  libavcodec/cbs.c          | 69 +++++++++++++++++++++++++++++++++++++++
>  libavcodec/cbs.h          |  9 +++++
>  libavcodec/cbs_internal.h | 60 ++++++++++++++++++++++++++++++++++
>  3 files changed, 138 insertions(+)
> 
> diff --git a/libavcodec/cbs.c b/libavcodec/cbs.c
> index 0bd5e1ac5d..6cc559e545 100644
> --- a/libavcodec/cbs.c
> +++ b/libavcodec/cbs.c
> @@ -812,3 +812,72 @@ void ff_cbs_delete_unit(CodedBitstreamContext *ctx,
>                  frag->units + position + 1,
>                  (frag->nb_units - position) * sizeof(*frag->units));
>  }
> +
> +static void cbs_default_free_unit_content(void *opaque, uint8_t *data)
> +{
> +    const CodedBitstreamUnitTypeDescriptor *desc = opaque;
> +    if (desc->content_type == CBS_CONTENT_TYPE_INTERNAL_REFS) {
> +        int i;
> +        for (i = 0; i < desc->nb_ref_offsets; i++) {
> +            void **ptr = (void**)(data + desc->ref_offsets[i]);
> +            av_buffer_unref((AVBufferRef**)(ptr + 1));
> +        }
> +    }
> +    av_free(data);
> +}
> +
> +static const CodedBitstreamUnitTypeDescriptor
> +    *cbs_find_unit_type_desc(CodedBitstreamContext *ctx,
> +                             CodedBitstreamUnit *unit)
> +{
> +    const CodedBitstreamUnitTypeDescriptor *desc;
> +    int i, j;
> +
> +    if (!ctx->codec->unit_types)
> +        return NULL;
> +
> +    for (i = 0;; i++) {
> +        desc = &ctx->codec->unit_types[i];
> +        if (desc->nb_unit_types == 0)
> +            break;
> +        if (desc->nb_unit_types == CBS_UNIT_TYPE_RANGE) {
> +            if (unit->type >= desc->unit_type_range_start &&
> +                unit->type <= desc->unit_type_range_end)
> +                return desc;
> +        } else {
> +            for (j = 0; j < desc->nb_unit_types; j++) {
> +                if (desc->unit_types[j] == unit->type)
> +                    return desc;
> +            }
> +        }
> +    }
> +    return NULL;
> +}
> +
> +int ff_cbs_alloc_unit_content2(CodedBitstreamContext *ctx,
> +                               CodedBitstreamUnit *unit)
> +{
> +    const CodedBitstreamUnitTypeDescriptor *desc;
> +
> +    av_assert0(!unit->content && !unit->content_ref);
> +
> +    desc = cbs_find_unit_type_desc(ctx, unit);
> +    if (!desc)
> +        return AVERROR(ENOSYS);
> +
> +    unit->content = av_mallocz(desc->content_size);
> +    if (!unit->content)
> +        return AVERROR(ENOMEM);
> +
> +    unit->content_ref =
> +        av_buffer_create(unit->content, desc->content_size,
> +                         desc->content_free ? desc->content_free
> +                                            : cbs_default_free_unit_content,
> +                         (void*)desc, 0);
> +    if (!unit->content_ref) {
> +        av_freep(&unit->content);
> +        return AVERROR(ENOMEM);
> +    }
> +
> +    return 0;
> +}
> diff --git a/libavcodec/cbs.h b/libavcodec/cbs.h
> index cb3081e2c6..2a5959a2b0 100644
> --- a/libavcodec/cbs.h
> +++ b/libavcodec/cbs.h
> @@ -352,6 +352,15 @@ int ff_cbs_alloc_unit_content(CodedBitstreamContext *ctx,
>                                size_t size,
>                                void (*free)(void *opaque, uint8_t *content));
>  
> +/**
> + * Allocate a new internal content buffer matching the type of the unit.
> + *
> + * The content will be zeroed.
> + */
> +int ff_cbs_alloc_unit_content2(CodedBitstreamContext *ctx,
> +                               CodedBitstreamUnit *unit);
> +
> +
>  /**
>   * Allocate a new internal data buffer of the given size in the unit.
>   *
> diff --git a/libavcodec/cbs_internal.h b/libavcodec/cbs_internal.h
> index 4c5a535ca6..615f514a85 100644
> --- a/libavcodec/cbs_internal.h
> +++ b/libavcodec/cbs_internal.h
> @@ -25,11 +25,71 @@
>  #include "put_bits.h"
>  
>  
> +enum {
> +    // Unit content is a simple structure.
> +    CBS_CONTENT_TYPE_POD,
> +    // Unit content contains some references to other structures, but all
> +    // managed via buffer reference counting.  The descriptor defines the
> +    // structure offsets of every buffer reference.
> +    CBS_CONTENT_TYPE_INTERNAL_REFS,
> +    // Unit content is something more complex.  The descriptor defines
> +    // special functions to manage the content.
> +    CBS_CONTENT_TYPE_COMPLEX,
> +};
> +
> +enum {
> +      // Maximum number of unit types described by the same unit type
> +      // descriptor.
> +      CBS_MAX_UNIT_TYPES = 16,

This is quite big and I wonder whether it would not be better to
simply split the HEVC-slices into two range descriptors in order to
reduce this to three.

> +      // Maximum number of reference buffer offsets in any one unit.
> +      CBS_MAX_REF_OFFSETS = 2,
> +      // Special value used in a unit type descriptor to indicate that it
> +      // applies to a large range of types rather than a set of discrete
> +      // values.
> +      CBS_UNIT_TYPE_RANGE = -1,
> +};
> +
> +typedef struct CodedBitstreamUnitTypeDescriptor {
> +    // Number of entries in the unit_types array, or the special value
> +    // CBS_UNIT_TYPE_RANGE to indicate that the range fields should be
> +    // used instead.
> +    int nb_unit_types;
> +
> +    // Array of unit types that this entry describes.
> +    const CodedBitstreamUnitType unit_types[CBS_MAX_UNIT_TYPES];
> +
> +    // Start and end of unit type range, used if nb_unit_types == 0.

nb_unit_types == 0 is actually used for the sentinel in the
CodedBitstreamUnitTypeDescriptor-array. nb_unit_types ==
CBS_UNIT_TYPE_RANGE indicates that this descriptor uses ranges.

> +    const CodedBitstreamUnitType unit_type_range_start;
> +    const CodedBitstreamUnitType unit_type_range_end;

The ranges could be free (size-wise) if you used a union with unit_types.
> +
> +    // The type of content described, from CBS_CONTENT_TYPE_*.
> +    int    content_type;

Maybe make a proper type out of the CBS_CONTENT_TYPE_*-enum and use it
here?

> +    // The size of the structure which should be allocated to contain
> +    // the decomposed content of this type of unit.
> +    size_t content_size;
> +
> +    // Number of entries in the ref_offsets array.  Only used if the
> +    // content_type is CBS_CONTENT_TYPE_INTERNAL_REFS.
> +    int nb_ref_offsets;
> +    // The structure must contain two adjacent elements:
> +    //   type        *field;
> +    //   AVBufferRef *field_ref;
> +    // where field points to something in the buffer referred to by
> +    // field_ref.  This offset is then set to offsetof(struct, field).
> +    size_t ref_offsets[CBS_MAX_REF_OFFSETS];
> +
> +    void (*content_free)(void *opaque, uint8_t *data);

Is there a usecase for a dedicated free-function different for a unit
of type CBS_CONTENT_TYPE_INTERNAL_REFS? If not, then one could use a
union for this and the ref_offset stuff.

> +} CodedBitstreamUnitTypeDescriptor;

I wonder whether you should add const to the typedef in order to omit
it everywhere else. After all, no CodedBitstreamUnitTypeDescriptor
will ever be assembled during runtime.

> +
>  typedef struct CodedBitstreamType {
>      enum AVCodecID codec_id;
>  
>      size_t priv_data_size;
>  
> +    // List of unit type descriptors for this codec.
> +    // Terminated by a descriptor with nb_unit_types equal to zero.
> +    const CodedBitstreamUnitTypeDescriptor *unit_types;
> +
>      // Split frag->data into coded bitstream units, creating the
>      // frag->units array.  Fill data but not content on each unit.
>      // The header argument should be set if the fragment came from
>
Mark Thompson Feb. 24, 2020, 9:56 p.m. UTC | #2
On 24/02/2020 17:19, Andreas Rheinhardt wrote:
> Mark Thompson:
>> Unit types are split into three categories, depending on how their
>> content is managed:
>> * POD structure - these require no special treatment.
>> * Structure containing references to refcounted buffers - these can use
>>   a common free function when the offsets of all the internal references
>>   are known.
>> * More complex structures - these still require ad-hoc treatment.
>>
>> For each codec we can then maintain a table of descriptors for each set of
>> equivalent unit types, defining the mechanism needed to allocate/free that
>> unit content.  This is not required to be used immediately - a new alloc
>> function supports this, but does not replace the old one which works without
>> referring to these tables.
>> ---
>>  libavcodec/cbs.c          | 69 +++++++++++++++++++++++++++++++++++++++
>>  libavcodec/cbs.h          |  9 +++++
>>  libavcodec/cbs_internal.h | 60 ++++++++++++++++++++++++++++++++++
>>  3 files changed, 138 insertions(+)
>>
>> ...
>> diff --git a/libavcodec/cbs_internal.h b/libavcodec/cbs_internal.h
>> index 4c5a535ca6..615f514a85 100644
>> --- a/libavcodec/cbs_internal.h
>> +++ b/libavcodec/cbs_internal.h
>> @@ -25,11 +25,71 @@
>>  #include "put_bits.h"
>>  
>>  
>> +enum {
>> +    // Unit content is a simple structure.
>> +    CBS_CONTENT_TYPE_POD,
>> +    // Unit content contains some references to other structures, but all
>> +    // managed via buffer reference counting.  The descriptor defines the
>> +    // structure offsets of every buffer reference.
>> +    CBS_CONTENT_TYPE_INTERNAL_REFS,
>> +    // Unit content is something more complex.  The descriptor defines
>> +    // special functions to manage the content.
>> +    CBS_CONTENT_TYPE_COMPLEX,
>> +};
>> +
>> +enum {
>> +      // Maximum number of unit types described by the same unit type
>> +      // descriptor.
>> +      CBS_MAX_UNIT_TYPES = 16,
> 
> This is quite big and I wonder whether it would not be better to
> simply split the HEVC-slices into two range descriptors in order to
> reduce this to three.

As-written, the range case is only covering the one actual range case (MPEG-2 slices).  I think it's preferable to leave the HEVC slice headers as-is, because having the full list there explicitly is much clearer.

As an alternative, would you prefer the array here to be a pointer + compound literal array?  It would avoid this constant and save few bytes of space on average, at the cost of the definitions being slightly more complex.

>> +      // Maximum number of reference buffer offsets in any one unit.
>> +      CBS_MAX_REF_OFFSETS = 2,
>> +      // Special value used in a unit type descriptor to indicate that it
>> +      // applies to a large range of types rather than a set of discrete
>> +      // values.
>> +      CBS_UNIT_TYPE_RANGE = -1,
>> +};
>> +
>> +typedef struct CodedBitstreamUnitTypeDescriptor {
>> +    // Number of entries in the unit_types array, or the special value
>> +    // CBS_UNIT_TYPE_RANGE to indicate that the range fields should be
>> +    // used instead.
>> +    int nb_unit_types;
>> +
>> +    // Array of unit types that this entry describes.
>> +    const CodedBitstreamUnitType unit_types[CBS_MAX_UNIT_TYPES];
>> +
>> +    // Start and end of unit type range, used if nb_unit_types == 0.
> 
> nb_unit_types == 0 is actually used for the sentinel in the
> CodedBitstreamUnitTypeDescriptor-array. nb_unit_types ==
> CBS_UNIT_TYPE_RANGE indicates that this descriptor uses ranges.

Fixed.

>> +    const CodedBitstreamUnitType unit_type_range_start;
>> +    const CodedBitstreamUnitType unit_type_range_end;
> 
> The ranges could be free (size-wise) if you used a union with unit_types.

Anonymous unions are still not allowed in FFmpeg, unfortunately (they are C11, though many compilers supported them before that).

>> +
>> +    // The type of content described, from CBS_CONTENT_TYPE_*.
>> +    int    content_type;
> 
> Maybe make a proper type out of the CBS_CONTENT_TYPE_*-enum and use it
> here?

That's a good idea; done.

>> +    // The size of the structure which should be allocated to contain
>> +    // the decomposed content of this type of unit.
>> +    size_t content_size;
>> +
>> +    // Number of entries in the ref_offsets array.  Only used if the
>> +    // content_type is CBS_CONTENT_TYPE_INTERNAL_REFS.
>> +    int nb_ref_offsets;
>> +    // The structure must contain two adjacent elements:
>> +    //   type        *field;
>> +    //   AVBufferRef *field_ref;
>> +    // where field points to something in the buffer referred to by
>> +    // field_ref.  This offset is then set to offsetof(struct, field).
>> +    size_t ref_offsets[CBS_MAX_REF_OFFSETS];
>> +
>> +    void (*content_free)(void *opaque, uint8_t *data);
> 
> Is there a usecase for a dedicated free-function different for a unit
> of type CBS_CONTENT_TYPE_INTERNAL_REFS? If not, then one could use a
> union for this and the ref_offset stuff.

Yes, but the same anonymous union problem.

>> +} CodedBitstreamUnitTypeDescriptor;
> 
> I wonder whether you should add const to the typedef in order to omit
> it everywhere else. After all, no CodedBitstreamUnitTypeDescriptor
> will ever be assembled during runtime.

It definitely makes sense to add it to reduce errors.  Not so sure about the removing it from everywhere else - the fact that it looks wrong at the point of use probably causes more confusion.

So, I've done the first part but not the second (helpfully, redundant type qualifiers have no effect).

>> +
>>  typedef struct CodedBitstreamType {
>>      enum AVCodecID codec_id;
>>  
>>      size_t priv_data_size;
>>  
>> +    // List of unit type descriptors for this codec.
>> +    // Terminated by a descriptor with nb_unit_types equal to zero.
>> +    const CodedBitstreamUnitTypeDescriptor *unit_types;
>> +
>>      // Split frag->data into coded bitstream units, creating the
>>      // frag->units array.  Fill data but not content on each unit.
>>      // The header argument should be set if the fragment came from
>>
Thanks,

- Mark
Andreas Rheinhardt Feb. 24, 2020, 10:10 p.m. UTC | #3
Mark Thompson:
> On 24/02/2020 17:19, Andreas Rheinhardt wrote:
>> Mark Thompson:
>>> Unit types are split into three categories, depending on how their
>>> content is managed:
>>> * POD structure - these require no special treatment.
>>> * Structure containing references to refcounted buffers - these can use
>>>   a common free function when the offsets of all the internal references
>>>   are known.
>>> * More complex structures - these still require ad-hoc treatment.
>>>
>>> For each codec we can then maintain a table of descriptors for each set of
>>> equivalent unit types, defining the mechanism needed to allocate/free that
>>> unit content.  This is not required to be used immediately - a new alloc
>>> function supports this, but does not replace the old one which works without
>>> referring to these tables.
>>> ---
>>>  libavcodec/cbs.c          | 69 +++++++++++++++++++++++++++++++++++++++
>>>  libavcodec/cbs.h          |  9 +++++
>>>  libavcodec/cbs_internal.h | 60 ++++++++++++++++++++++++++++++++++
>>>  3 files changed, 138 insertions(+)
>>>
>>> ...
>>> diff --git a/libavcodec/cbs_internal.h b/libavcodec/cbs_internal.h
>>> index 4c5a535ca6..615f514a85 100644
>>> --- a/libavcodec/cbs_internal.h
>>> +++ b/libavcodec/cbs_internal.h
>>> @@ -25,11 +25,71 @@
>>>  #include "put_bits.h"
>>>  
>>>  
>>> +enum {
>>> +    // Unit content is a simple structure.
>>> +    CBS_CONTENT_TYPE_POD,
>>> +    // Unit content contains some references to other structures, but all
>>> +    // managed via buffer reference counting.  The descriptor defines the
>>> +    // structure offsets of every buffer reference.
>>> +    CBS_CONTENT_TYPE_INTERNAL_REFS,
>>> +    // Unit content is something more complex.  The descriptor defines
>>> +    // special functions to manage the content.
>>> +    CBS_CONTENT_TYPE_COMPLEX,
>>> +};
>>> +
>>> +enum {
>>> +      // Maximum number of unit types described by the same unit type
>>> +      // descriptor.
>>> +      CBS_MAX_UNIT_TYPES = 16,
>>
>> This is quite big and I wonder whether it would not be better to
>> simply split the HEVC-slices into two range descriptors in order to
>> reduce this to three.
> 
> As-written, the range case is only covering the one actual range case (MPEG-2 slices).  I think it's preferable to leave the HEVC slice headers as-is, because having the full list there explicitly is much clearer.
> 
> As an alternative, would you prefer the array here to be a pointer + compound literal array?  It would avoid this constant and save few bytes of space on average, at the cost of the definitions being slightly more complex.
> 
No.

>>> +      // Maximum number of reference buffer offsets in any one unit.
>>> +      CBS_MAX_REF_OFFSETS = 2,
>>> +      // Special value used in a unit type descriptor to indicate that it
>>> +      // applies to a large range of types rather than a set of discrete
>>> +      // values.
>>> +      CBS_UNIT_TYPE_RANGE = -1,
>>> +};
>>> +
>>> +typedef struct CodedBitstreamUnitTypeDescriptor {
>>> +    // Number of entries in the unit_types array, or the special value
>>> +    // CBS_UNIT_TYPE_RANGE to indicate that the range fields should be
>>> +    // used instead.
>>> +    int nb_unit_types;
>>> +
>>> +    // Array of unit types that this entry describes.
>>> +    const CodedBitstreamUnitType unit_types[CBS_MAX_UNIT_TYPES];
>>> +
>>> +    // Start and end of unit type range, used if nb_unit_types == 0.
>>
>> nb_unit_types == 0 is actually used for the sentinel in the
>> CodedBitstreamUnitTypeDescriptor-array. nb_unit_types ==
>> CBS_UNIT_TYPE_RANGE indicates that this descriptor uses ranges.
> 
> Fixed.
> 
>>> +    const CodedBitstreamUnitType unit_type_range_start;
>>> +    const CodedBitstreamUnitType unit_type_range_end;
>>
>> The ranges could be free (size-wise) if you used a union with unit_types.
> 
> Anonymous unions are still not allowed in FFmpeg, unfortunately (they are C11, though many compilers supported them before that).
> 
What about a non-anonymous union? It would only be used in
cbs_find_unit_type_desc().
>>> +
>>> +    // The type of content described, from CBS_CONTENT_TYPE_*.
>>> +    int    content_type;
>>
>> Maybe make a proper type out of the CBS_CONTENT_TYPE_*-enum and use it
>> here?
> 
> That's a good idea; done.
> 
>>> +    // The size of the structure which should be allocated to contain
>>> +    // the decomposed content of this type of unit.
>>> +    size_t content_size;
>>> +
>>> +    // Number of entries in the ref_offsets array.  Only used if the
>>> +    // content_type is CBS_CONTENT_TYPE_INTERNAL_REFS.
>>> +    int nb_ref_offsets;
>>> +    // The structure must contain two adjacent elements:
>>> +    //   type        *field;
>>> +    //   AVBufferRef *field_ref;
>>> +    // where field points to something in the buffer referred to by
>>> +    // field_ref.  This offset is then set to offsetof(struct, field).
>>> +    size_t ref_offsets[CBS_MAX_REF_OFFSETS];
>>> +
>>> +    void (*content_free)(void *opaque, uint8_t *data);
>>
>> Is there a usecase for a dedicated free-function different for a unit
>> of type CBS_CONTENT_TYPE_INTERNAL_REFS? If not, then one could use a
>> union for this and the ref_offset stuff.
> 
> Yes, but the same anonymous union problem.
> 
>>> +} CodedBitstreamUnitTypeDescriptor;
>>
>> I wonder whether you should add const to the typedef in order to omit
>> it everywhere else. After all, no CodedBitstreamUnitTypeDescriptor
>> will ever be assembled during runtime.
> 
> It definitely makes sense to add it to reduce errors.  Not so sure about the removing it from everywhere else - the fact that it looks wrong at the point of use probably causes more confusion.
> 
> So, I've done the first part but not the second (helpfully, redundant type qualifiers have no effect).

MSVC emits a warning (or just a note or so) for this.

- Andreas
Mark Thompson Feb. 24, 2020, 10:31 p.m. UTC | #4
On 24/02/2020 22:10, Andreas Rheinhardt wrote:
> Mark Thompson:
>> On 24/02/2020 17:19, Andreas Rheinhardt wrote:
>>> Mark Thompson:
>>>> Unit types are split into three categories, depending on how their
>>>> content is managed:
>>>> * POD structure - these require no special treatment.
>>>> * Structure containing references to refcounted buffers - these can use
>>>>   a common free function when the offsets of all the internal references
>>>>   are known.
>>>> * More complex structures - these still require ad-hoc treatment.
>>>>
>>>> For each codec we can then maintain a table of descriptors for each set of
>>>> equivalent unit types, defining the mechanism needed to allocate/free that
>>>> unit content.  This is not required to be used immediately - a new alloc
>>>> function supports this, but does not replace the old one which works without
>>>> referring to these tables.
>>>> ---
>>>>  libavcodec/cbs.c          | 69 +++++++++++++++++++++++++++++++++++++++
>>>>  libavcodec/cbs.h          |  9 +++++
>>>>  libavcodec/cbs_internal.h | 60 ++++++++++++++++++++++++++++++++++
>>>>  3 files changed, 138 insertions(+)
>>>>
>>>> ...
>>>> diff --git a/libavcodec/cbs_internal.h b/libavcodec/cbs_internal.h
>>>> index 4c5a535ca6..615f514a85 100644
>>>> --- a/libavcodec/cbs_internal.h
>>>> +++ b/libavcodec/cbs_internal.h
>>>> @@ -25,11 +25,71 @@
>>>>  #include "put_bits.h"
>>>>  
>>>>  
>>>> +enum {
>>>> +    // Unit content is a simple structure.
>>>> +    CBS_CONTENT_TYPE_POD,
>>>> +    // Unit content contains some references to other structures, but all
>>>> +    // managed via buffer reference counting.  The descriptor defines the
>>>> +    // structure offsets of every buffer reference.
>>>> +    CBS_CONTENT_TYPE_INTERNAL_REFS,
>>>> +    // Unit content is something more complex.  The descriptor defines
>>>> +    // special functions to manage the content.
>>>> +    CBS_CONTENT_TYPE_COMPLEX,
>>>> +};
>>>> +
>>>> +enum {
>>>> +      // Maximum number of unit types described by the same unit type
>>>> +      // descriptor.
>>>> +      CBS_MAX_UNIT_TYPES = 16,
>>>
>>> This is quite big and I wonder whether it would not be better to
>>> simply split the HEVC-slices into two range descriptors in order to
>>> reduce this to three.
>>
>> As-written, the range case is only covering the one actual range case (MPEG-2 slices).  I think it's preferable to leave the HEVC slice headers as-is, because having the full list there explicitly is much clearer.
>>
>> As an alternative, would you prefer the array here to be a pointer + compound literal array?  It would avoid this constant and save few bytes of space on average, at the cost of the definitions being slightly more complex.
>>
> No.
> 
>>>> +      // Maximum number of reference buffer offsets in any one unit.
>>>> +      CBS_MAX_REF_OFFSETS = 2,
>>>> +      // Special value used in a unit type descriptor to indicate that it
>>>> +      // applies to a large range of types rather than a set of discrete
>>>> +      // values.
>>>> +      CBS_UNIT_TYPE_RANGE = -1,
>>>> +};
>>>> +
>>>> +typedef struct CodedBitstreamUnitTypeDescriptor {
>>>> +    // Number of entries in the unit_types array, or the special value
>>>> +    // CBS_UNIT_TYPE_RANGE to indicate that the range fields should be
>>>> +    // used instead.
>>>> +    int nb_unit_types;
>>>> +
>>>> +    // Array of unit types that this entry describes.
>>>> +    const CodedBitstreamUnitType unit_types[CBS_MAX_UNIT_TYPES];
>>>> +
>>>> +    // Start and end of unit type range, used if nb_unit_types == 0.
>>>
>>> nb_unit_types == 0 is actually used for the sentinel in the
>>> CodedBitstreamUnitTypeDescriptor-array. nb_unit_types ==
>>> CBS_UNIT_TYPE_RANGE indicates that this descriptor uses ranges.
>>
>> Fixed.
>>
>>>> +    const CodedBitstreamUnitType unit_type_range_start;
>>>> +    const CodedBitstreamUnitType unit_type_range_end;
>>>
>>> The ranges could be free (size-wise) if you used a union with unit_types.
>>
>> Anonymous unions are still not allowed in FFmpeg, unfortunately (they are C11, though many compilers supported them before that).
>>
> What about a non-anonymous union? It would only be used in
> cbs_find_unit_type_desc().

Mildly against because it would be ugly to bunch together the unrelated fields, but I could be pushed into it.

>>>> +
>>>> +    // The type of content described, from CBS_CONTENT_TYPE_*.
>>>> +    int    content_type;
>>>
>>> Maybe make a proper type out of the CBS_CONTENT_TYPE_*-enum and use it
>>> here?
>>
>> That's a good idea; done.
>>
>>>> +    // The size of the structure which should be allocated to contain
>>>> +    // the decomposed content of this type of unit.
>>>> +    size_t content_size;
>>>> +
>>>> +    // Number of entries in the ref_offsets array.  Only used if the
>>>> +    // content_type is CBS_CONTENT_TYPE_INTERNAL_REFS.
>>>> +    int nb_ref_offsets;
>>>> +    // The structure must contain two adjacent elements:
>>>> +    //   type        *field;
>>>> +    //   AVBufferRef *field_ref;
>>>> +    // where field points to something in the buffer referred to by
>>>> +    // field_ref.  This offset is then set to offsetof(struct, field).
>>>> +    size_t ref_offsets[CBS_MAX_REF_OFFSETS];
>>>> +
>>>> +    void (*content_free)(void *opaque, uint8_t *data);
>>>
>>> Is there a usecase for a dedicated free-function different for a unit
>>> of type CBS_CONTENT_TYPE_INTERNAL_REFS? If not, then one could use a
>>> union for this and the ref_offset stuff.
>>
>> Yes, but the same anonymous union problem.
>>
>>>> +} CodedBitstreamUnitTypeDescriptor;
>>>
>>> I wonder whether you should add const to the typedef in order to omit
>>> it everywhere else. After all, no CodedBitstreamUnitTypeDescriptor
>>> will ever be assembled during runtime.
>>
>> It definitely makes sense to add it to reduce errors.  Not so sure about the removing it from everywhere else - the fact that it looks wrong at the point of use probably causes more confusion.
>>
>> So, I've done the first part but not the second (helpfully, redundant type qualifiers have no effect).
> 
> MSVC emits a warning (or just a note or so) for this.
Urgh.  Is that definitely intended or is it a bug in the compiler?  The C standard is very clear that this is fine (C11 6.7.3).

- Mark
Andreas Rheinhardt Feb. 24, 2020, 10:42 p.m. UTC | #5
Mark Thompson:
> On 24/02/2020 22:10, Andreas Rheinhardt wrote:
>> Mark Thompson:
>>> On 24/02/2020 17:19, Andreas Rheinhardt wrote:
>>>> Mark Thompson:
>>>>> Unit types are split into three categories, depending on how their
>>>>> content is managed:
>>>>> * POD structure - these require no special treatment.
>>>>> * Structure containing references to refcounted buffers - these can use
>>>>>   a common free function when the offsets of all the internal references
>>>>>   are known.
>>>>> * More complex structures - these still require ad-hoc treatment.
>>>>>
>>>>> For each codec we can then maintain a table of descriptors for each set of
>>>>> equivalent unit types, defining the mechanism needed to allocate/free that
>>>>> unit content.  This is not required to be used immediately - a new alloc
>>>>> function supports this, but does not replace the old one which works without
>>>>> referring to these tables.
>>>>> ---
>>>>>  libavcodec/cbs.c          | 69 +++++++++++++++++++++++++++++++++++++++
>>>>>  libavcodec/cbs.h          |  9 +++++
>>>>>  libavcodec/cbs_internal.h | 60 ++++++++++++++++++++++++++++++++++
>>>>>  3 files changed, 138 insertions(+)
>>>>>
>>>>> ...
>>>>> diff --git a/libavcodec/cbs_internal.h b/libavcodec/cbs_internal.h
>>>>> index 4c5a535ca6..615f514a85 100644
>>>>> --- a/libavcodec/cbs_internal.h
>>>>> +++ b/libavcodec/cbs_internal.h
>>>>> @@ -25,11 +25,71 @@
>>>>>  #include "put_bits.h"
>>>>>  
>>>>>  
>>>>> +enum {
>>>>> +    // Unit content is a simple structure.
>>>>> +    CBS_CONTENT_TYPE_POD,
>>>>> +    // Unit content contains some references to other structures, but all
>>>>> +    // managed via buffer reference counting.  The descriptor defines the
>>>>> +    // structure offsets of every buffer reference.
>>>>> +    CBS_CONTENT_TYPE_INTERNAL_REFS,
>>>>> +    // Unit content is something more complex.  The descriptor defines
>>>>> +    // special functions to manage the content.
>>>>> +    CBS_CONTENT_TYPE_COMPLEX,
>>>>> +};
>>>>> +
>>>>> +enum {
>>>>> +      // Maximum number of unit types described by the same unit type
>>>>> +      // descriptor.
>>>>> +      CBS_MAX_UNIT_TYPES = 16,
>>>>
>>>> This is quite big and I wonder whether it would not be better to
>>>> simply split the HEVC-slices into two range descriptors in order to
>>>> reduce this to three.
>>>
>>> As-written, the range case is only covering the one actual range case (MPEG-2 slices).  I think it's preferable to leave the HEVC slice headers as-is, because having the full list there explicitly is much clearer.
>>>
>>> As an alternative, would you prefer the array here to be a pointer + compound literal array?  It would avoid this constant and save few bytes of space on average, at the cost of the definitions being slightly more complex.
>>>
>> No.
>>
>>>>> +      // Maximum number of reference buffer offsets in any one unit.
>>>>> +      CBS_MAX_REF_OFFSETS = 2,
>>>>> +      // Special value used in a unit type descriptor to indicate that it
>>>>> +      // applies to a large range of types rather than a set of discrete
>>>>> +      // values.
>>>>> +      CBS_UNIT_TYPE_RANGE = -1,
>>>>> +};
>>>>> +
>>>>> +typedef struct CodedBitstreamUnitTypeDescriptor {
>>>>> +    // Number of entries in the unit_types array, or the special value
>>>>> +    // CBS_UNIT_TYPE_RANGE to indicate that the range fields should be
>>>>> +    // used instead.
>>>>> +    int nb_unit_types;
>>>>> +
>>>>> +    // Array of unit types that this entry describes.
>>>>> +    const CodedBitstreamUnitType unit_types[CBS_MAX_UNIT_TYPES];
>>>>> +
>>>>> +    // Start and end of unit type range, used if nb_unit_types == 0.
>>>>
>>>> nb_unit_types == 0 is actually used for the sentinel in the
>>>> CodedBitstreamUnitTypeDescriptor-array. nb_unit_types ==
>>>> CBS_UNIT_TYPE_RANGE indicates that this descriptor uses ranges.
>>>
>>> Fixed.
>>>
>>>>> +    const CodedBitstreamUnitType unit_type_range_start;
>>>>> +    const CodedBitstreamUnitType unit_type_range_end;
>>>>
>>>> The ranges could be free (size-wise) if you used a union with unit_types.
>>>
>>> Anonymous unions are still not allowed in FFmpeg, unfortunately (they are C11, though many compilers supported them before that).
>>>
>> What about a non-anonymous union? It would only be used in
>> cbs_find_unit_type_desc().
> 
> Mildly against because it would be ugly to bunch together the unrelated fields, but I could be pushed into it.
> 
>>>>> +
>>>>> +    // The type of content described, from CBS_CONTENT_TYPE_*.
>>>>> +    int    content_type;
>>>>
>>>> Maybe make a proper type out of the CBS_CONTENT_TYPE_*-enum and use it
>>>> here?
>>>
>>> That's a good idea; done.
>>>
>>>>> +    // The size of the structure which should be allocated to contain
>>>>> +    // the decomposed content of this type of unit.
>>>>> +    size_t content_size;
>>>>> +
>>>>> +    // Number of entries in the ref_offsets array.  Only used if the
>>>>> +    // content_type is CBS_CONTENT_TYPE_INTERNAL_REFS.
>>>>> +    int nb_ref_offsets;
>>>>> +    // The structure must contain two adjacent elements:
>>>>> +    //   type        *field;
>>>>> +    //   AVBufferRef *field_ref;
>>>>> +    // where field points to something in the buffer referred to by
>>>>> +    // field_ref.  This offset is then set to offsetof(struct, field).
>>>>> +    size_t ref_offsets[CBS_MAX_REF_OFFSETS];
>>>>> +
>>>>> +    void (*content_free)(void *opaque, uint8_t *data);
>>>>
>>>> Is there a usecase for a dedicated free-function different for a unit
>>>> of type CBS_CONTENT_TYPE_INTERNAL_REFS? If not, then one could use a
>>>> union for this and the ref_offset stuff.
>>>
>>> Yes, but the same anonymous union problem.
>>>
>>>>> +} CodedBitstreamUnitTypeDescriptor;
>>>>
>>>> I wonder whether you should add const to the typedef in order to omit
>>>> it everywhere else. After all, no CodedBitstreamUnitTypeDescriptor
>>>> will ever be assembled during runtime.
>>>
>>> It definitely makes sense to add it to reduce errors.  Not so sure about the removing it from everywhere else - the fact that it looks wrong at the point of use probably causes more confusion.
>>>
>>> So, I've done the first part but not the second (helpfully, redundant type qualifiers have no effect).
>>
>> MSVC emits a warning (or just a note or so) for this.
> Urgh.  Is that definitely intended or is it a bug in the compiler?  The C standard is very clear that this is fine (C11 6.7.3).
> 
This is also the way it is in C99, but given that [1] says that it
leads to an error with MSVC in ANSI-C mode (which means C90), I looked
at C90 and found:
"The same type qualifier shall not appear more than once in the same
specifier list or qualifier list, either directly or via one or more
typedefs."

- Andreas

[1]:
https://docs.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-1-c4114?view=vs-2019
Mark Thompson Feb. 24, 2020, 10:52 p.m. UTC | #6
On 24/02/2020 22:42, Andreas Rheinhardt wrote:
> Mark Thompson:
>> On 24/02/2020 22:10, Andreas Rheinhardt wrote:
>>> Mark Thompson:
>>>> On 24/02/2020 17:19, Andreas Rheinhardt wrote:
>>>>> Mark Thompson:
>>>>>> Unit types are split into three categories, depending on how their
>>>>>> content is managed:
>>>>>> * POD structure - these require no special treatment.
>>>>>> * Structure containing references to refcounted buffers - these can use
>>>>>>   a common free function when the offsets of all the internal references
>>>>>>   are known.
>>>>>> * More complex structures - these still require ad-hoc treatment.
>>>>>>
>>>>>> For each codec we can then maintain a table of descriptors for each set of
>>>>>> equivalent unit types, defining the mechanism needed to allocate/free that
>>>>>> unit content.  This is not required to be used immediately - a new alloc
>>>>>> function supports this, but does not replace the old one which works without
>>>>>> referring to these tables.
>>>>>> ---
>>>>>>  libavcodec/cbs.c          | 69 +++++++++++++++++++++++++++++++++++++++
>>>>>>  libavcodec/cbs.h          |  9 +++++
>>>>>>  libavcodec/cbs_internal.h | 60 ++++++++++++++++++++++++++++++++++
>>>>>>  3 files changed, 138 insertions(+)
>>>>>>
>>>>>> ...
>>>>>> +typedef struct CodedBitstreamUnitTypeDescriptor {
>>>>>> ...
>>>>>> +} CodedBitstreamUnitTypeDescriptor;
>>>>>
>>>>> I wonder whether you should add const to the typedef in order to omit
>>>>> it everywhere else. After all, no CodedBitstreamUnitTypeDescriptor
>>>>> will ever be assembled during runtime.
>>>>
>>>> It definitely makes sense to add it to reduce errors.  Not so sure about the removing it from everywhere else - the fact that it looks wrong at the point of use probably causes more confusion.
>>>>
>>>> So, I've done the first part but not the second (helpfully, redundant type qualifiers have no effect).
>>>
>>> MSVC emits a warning (or just a note or so) for this.
>> Urgh.  Is that definitely intended or is it a bug in the compiler?  The C standard is very clear that this is fine (C11 6.7.3).
>>
> This is also the way it is in C99, but given that [1] says that it
> leads to an error with MSVC in ANSI-C mode (which means C90), I looked
> at C90 and found:
> "The same type qualifier shall not appear more than once in the same
> specifier list or qualifier list, either directly or via one or more
> typedefs."

Ok, that's fatal to this plan.  I think we're better with the consts on the individual cases (less confusing, if slightly less efficient), so I've reverted back to the original.

Thanks,

- Mark
diff mbox series

Patch

diff --git a/libavcodec/cbs.c b/libavcodec/cbs.c
index 0bd5e1ac5d..6cc559e545 100644
--- a/libavcodec/cbs.c
+++ b/libavcodec/cbs.c
@@ -812,3 +812,72 @@  void ff_cbs_delete_unit(CodedBitstreamContext *ctx,
                 frag->units + position + 1,
                 (frag->nb_units - position) * sizeof(*frag->units));
 }
+
+static void cbs_default_free_unit_content(void *opaque, uint8_t *data)
+{
+    const CodedBitstreamUnitTypeDescriptor *desc = opaque;
+    if (desc->content_type == CBS_CONTENT_TYPE_INTERNAL_REFS) {
+        int i;
+        for (i = 0; i < desc->nb_ref_offsets; i++) {
+            void **ptr = (void**)(data + desc->ref_offsets[i]);
+            av_buffer_unref((AVBufferRef**)(ptr + 1));
+        }
+    }
+    av_free(data);
+}
+
+static const CodedBitstreamUnitTypeDescriptor
+    *cbs_find_unit_type_desc(CodedBitstreamContext *ctx,
+                             CodedBitstreamUnit *unit)
+{
+    const CodedBitstreamUnitTypeDescriptor *desc;
+    int i, j;
+
+    if (!ctx->codec->unit_types)
+        return NULL;
+
+    for (i = 0;; i++) {
+        desc = &ctx->codec->unit_types[i];
+        if (desc->nb_unit_types == 0)
+            break;
+        if (desc->nb_unit_types == CBS_UNIT_TYPE_RANGE) {
+            if (unit->type >= desc->unit_type_range_start &&
+                unit->type <= desc->unit_type_range_end)
+                return desc;
+        } else {
+            for (j = 0; j < desc->nb_unit_types; j++) {
+                if (desc->unit_types[j] == unit->type)
+                    return desc;
+            }
+        }
+    }
+    return NULL;
+}
+
+int ff_cbs_alloc_unit_content2(CodedBitstreamContext *ctx,
+                               CodedBitstreamUnit *unit)
+{
+    const CodedBitstreamUnitTypeDescriptor *desc;
+
+    av_assert0(!unit->content && !unit->content_ref);
+
+    desc = cbs_find_unit_type_desc(ctx, unit);
+    if (!desc)
+        return AVERROR(ENOSYS);
+
+    unit->content = av_mallocz(desc->content_size);
+    if (!unit->content)
+        return AVERROR(ENOMEM);
+
+    unit->content_ref =
+        av_buffer_create(unit->content, desc->content_size,
+                         desc->content_free ? desc->content_free
+                                            : cbs_default_free_unit_content,
+                         (void*)desc, 0);
+    if (!unit->content_ref) {
+        av_freep(&unit->content);
+        return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
diff --git a/libavcodec/cbs.h b/libavcodec/cbs.h
index cb3081e2c6..2a5959a2b0 100644
--- a/libavcodec/cbs.h
+++ b/libavcodec/cbs.h
@@ -352,6 +352,15 @@  int ff_cbs_alloc_unit_content(CodedBitstreamContext *ctx,
                               size_t size,
                               void (*free)(void *opaque, uint8_t *content));
 
+/**
+ * Allocate a new internal content buffer matching the type of the unit.
+ *
+ * The content will be zeroed.
+ */
+int ff_cbs_alloc_unit_content2(CodedBitstreamContext *ctx,
+                               CodedBitstreamUnit *unit);
+
+
 /**
  * Allocate a new internal data buffer of the given size in the unit.
  *
diff --git a/libavcodec/cbs_internal.h b/libavcodec/cbs_internal.h
index 4c5a535ca6..615f514a85 100644
--- a/libavcodec/cbs_internal.h
+++ b/libavcodec/cbs_internal.h
@@ -25,11 +25,71 @@ 
 #include "put_bits.h"
 
 
+enum {
+    // Unit content is a simple structure.
+    CBS_CONTENT_TYPE_POD,
+    // Unit content contains some references to other structures, but all
+    // managed via buffer reference counting.  The descriptor defines the
+    // structure offsets of every buffer reference.
+    CBS_CONTENT_TYPE_INTERNAL_REFS,
+    // Unit content is something more complex.  The descriptor defines
+    // special functions to manage the content.
+    CBS_CONTENT_TYPE_COMPLEX,
+};
+
+enum {
+      // Maximum number of unit types described by the same unit type
+      // descriptor.
+      CBS_MAX_UNIT_TYPES = 16,
+      // Maximum number of reference buffer offsets in any one unit.
+      CBS_MAX_REF_OFFSETS = 2,
+      // Special value used in a unit type descriptor to indicate that it
+      // applies to a large range of types rather than a set of discrete
+      // values.
+      CBS_UNIT_TYPE_RANGE = -1,
+};
+
+typedef struct CodedBitstreamUnitTypeDescriptor {
+    // Number of entries in the unit_types array, or the special value
+    // CBS_UNIT_TYPE_RANGE to indicate that the range fields should be
+    // used instead.
+    int nb_unit_types;
+
+    // Array of unit types that this entry describes.
+    const CodedBitstreamUnitType unit_types[CBS_MAX_UNIT_TYPES];
+
+    // Start and end of unit type range, used if nb_unit_types == 0.
+    const CodedBitstreamUnitType unit_type_range_start;
+    const CodedBitstreamUnitType unit_type_range_end;
+
+    // The type of content described, from CBS_CONTENT_TYPE_*.
+    int    content_type;
+    // The size of the structure which should be allocated to contain
+    // the decomposed content of this type of unit.
+    size_t content_size;
+
+    // Number of entries in the ref_offsets array.  Only used if the
+    // content_type is CBS_CONTENT_TYPE_INTERNAL_REFS.
+    int nb_ref_offsets;
+    // The structure must contain two adjacent elements:
+    //   type        *field;
+    //   AVBufferRef *field_ref;
+    // where field points to something in the buffer referred to by
+    // field_ref.  This offset is then set to offsetof(struct, field).
+    size_t ref_offsets[CBS_MAX_REF_OFFSETS];
+
+    void (*content_free)(void *opaque, uint8_t *data);
+} CodedBitstreamUnitTypeDescriptor;
+
 typedef struct CodedBitstreamType {
     enum AVCodecID codec_id;
 
     size_t priv_data_size;
 
+    // List of unit type descriptors for this codec.
+    // Terminated by a descriptor with nb_unit_types equal to zero.
+    const CodedBitstreamUnitTypeDescriptor *unit_types;
+
     // Split frag->data into coded bitstream units, creating the
     // frag->units array.  Fill data but not content on each unit.
     // The header argument should be set if the fragment came from