diff mbox

[FFmpeg-devel] dash: add descriptor which is useful to the scheme defined by ISO/IEC 23009-1:2014/Amd.2:2015.

Message ID 1563160264-131104-1-git-send-email-leozhang@qiyi.com
State Superseded
Headers show

Commit Message

leozhang July 15, 2019, 3:11 a.m. UTC
change history:
1. remove unnecessary cast.
2. add some braces.

Please comment, Thanks

Signed-off-by: leozhang <leozhang@qiyi.com>
---
 doc/muxers.texi       |  3 +++
 libavformat/dashenc.c | 35 ++++++++++++++++++++++++++++++++---
 2 files changed, 35 insertions(+), 3 deletions(-)

Comments

leozhang July 16, 2019, 9:16 a.m. UTC | #1
Let me add that, descriptor provides extensible syntax and semantics
for describing Adaptation Set properties.
In my scenario,I implemented one VR tiled video system using descriptor.

leozhang <leozhang@qiyi.com> 于2019年7月15日周一 上午11:11写道:
>
> change history:
> 1. remove unnecessary cast.
> 2. add some braces.
>
> Please comment, Thanks
>
> Signed-off-by: leozhang <leozhang@qiyi.com>
> ---
>  doc/muxers.texi       |  3 +++
>  libavformat/dashenc.c | 35 ++++++++++++++++++++++++++++++++---
>  2 files changed, 35 insertions(+), 3 deletions(-)
>
> diff --git a/doc/muxers.texi b/doc/muxers.texi
> index b109297..ac06ad2 100644
> --- a/doc/muxers.texi
> +++ b/doc/muxers.texi
> @@ -275,6 +275,9 @@ of the adaptation sets and a,b,c,d and e are the indices of the mapped streams.
>  To map all video (or audio) streams to an AdaptationSet, "v" (or "a") can be used as stream identifier instead of IDs.
>
>  When no assignment is defined, this defaults to an AdaptationSet for each stream.
> +
> +Optional syntax is "id=x,descriptor=descriptor_str,streams=a,b,c id=y,streams=d,e" and so on, descriptor is useful to the scheme defined by ISO/IEC 23009-1:2014/Amd.2:2015.
> +And descriptor_str must be a properly formatted XML element, which is encoded by base64.
>  @item timeout @var{timeout}
>  Set timeout for socket I/O operations. Applicable only for HTTP output.
>  @item index_correction @var{index_correction}
> diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c
> index b25afb4..a48031c 100644
> --- a/libavformat/dashenc.c
> +++ b/libavformat/dashenc.c
> @@ -34,6 +34,7 @@
>  #include "libavutil/rational.h"
>  #include "libavutil/time.h"
>  #include "libavutil/time_internal.h"
> +#include "libavutil/base64.h"
>
>  #include "avc.h"
>  #include "avformat.h"
> @@ -68,6 +69,7 @@ typedef struct Segment {
>
>  typedef struct AdaptationSet {
>      char id[10];
> +    char descriptor[1024];
>      enum AVMediaType media_type;
>      AVDictionary *metadata;
>      AVRational min_frame_rate, max_frame_rate;
> @@ -748,7 +750,8 @@ static int write_adaptation_set(AVFormatContext *s, AVIOContext *out, int as_ind
>      role = av_dict_get(as->metadata, "role", NULL, 0);
>      if (role)
>          avio_printf(out, "\t\t\t<Role schemeIdUri=\"urn:mpeg:dash:role:2011\" value=\"%s\"/>\n", role->value);
> -
> +    if (strlen(as->descriptor))
> +        avio_printf(out, "\t\t\t%s\n", as->descriptor);
>      for (i = 0; i < s->nb_streams; i++) {
>          OutputStream *os = &c->streams[i];
>          char bandwidth_str[64] = {'\0'};
> @@ -820,7 +823,7 @@ static int parse_adaptation_sets(AVFormatContext *s)
>  {
>      DASHContext *c = s->priv_data;
>      const char *p = c->adaptation_sets;
> -    enum { new_set, parse_id, parsing_streams } state;
> +    enum { new_set, parse_id, parsing_streams, parse_descriptor } state;
>      AdaptationSet *as;
>      int i, n, ret;
>
> @@ -837,6 +840,9 @@ static int parse_adaptation_sets(AVFormatContext *s)
>      }
>
>      // syntax id=0,streams=0,1,2 id=1,streams=3,4 and so on
> +    // option id=0,descriptor=descriptor_str,streams=0,1,2 and so on
> +    // descriptor is useful to the scheme defined by ISO/IEC 23009-1:2014/Amd.2:2015
> +    // descriptor_str must be a properly formatted XML element, encoded by base64.
>      state = new_set;
>      while (*p) {
>          if (*p == ' ') {
> @@ -854,7 +860,30 @@ static int parse_adaptation_sets(AVFormatContext *s)
>              if (*p)
>                  p++;
>              state = parse_id;
> -        } else if (state == parse_id && av_strstart(p, "streams=", &p)) {
> +        } else if (state == parse_id && av_strstart(p, "descriptor=", &p)) {
> +            char *encode_str, *decode_str;
> +            int decode_size, ret;
> +
> +            n = strcspn(p, ",");
> +            encode_str = av_strndup(p, n);
> +            decode_size = AV_BASE64_DECODE_SIZE(n);
> +            decode_str = av_mallocz(decode_size);
> +            if (decode_str) {
> +                ret = av_base64_decode(decode_str, encode_str, decode_size);
> +                if (ret >= 0)
> +                    snprintf(as->descriptor, sizeof(as->descriptor), "%.*s", decode_size, decode_str);
> +                else
> +                    av_log(s, AV_LOG_WARNING, "descriptor string is invalid base64 encode\n");
> +            } else {
> +                av_log(s, AV_LOG_WARNING, "av_mallocz failed, will not parse descriptor\n");
> +            }
> +            p += n;
> +            if (*p)
> +                p++;
> +            state = parse_descriptor;
> +            av_freep(&encode_str);
> +            av_freep(&decode_str);
> +        } else if ((state == parse_id || state == parse_descriptor) && av_strstart(p, "streams=", &p)) { //descriptor is optional
>              state = parsing_streams;
>          } else if (state == parsing_streams) {
>              AdaptationSet *as = &c->as[c->nb_as - 1];
> --
> 1.8.3.1
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Jeyapal, Karthick July 17, 2019, 2:46 a.m. UTC | #2
On 7/15/19 8:41 AM, leozhang wrote:
> change history:

> 1. remove unnecessary cast.

> 2. add some braces.

>

> Please comment, Thanks

Thanks for sending the patch. Please find some of my comments inlined below.
>

> Signed-off-by: leozhang <leozhang@qiyi.com>

> ---

>  doc/muxers.texi       |  3 +++

>  libavformat/dashenc.c | 35 ++++++++++++++++++++++++++++++++---

>  2 files changed, 35 insertions(+), 3 deletions(-)

>

> diff --git a/doc/muxers.texi b/doc/muxers.texi

> index b109297..ac06ad2 100644

> --- a/doc/muxers.texi

> +++ b/doc/muxers.texi

> @@ -275,6 +275,9 @@ of the adaptation sets and a,b,c,d and e are the indices of the mapped streams.

>  To map all video (or audio) streams to an AdaptationSet, "v" (or "a") can be used as stream identifier instead of IDs.

>  

>  When no assignment is defined, this defaults to an AdaptationSet for each stream.

> +

> +Optional syntax is "id=x,descriptor=descriptor_str,streams=a,b,c id=y,streams=d,e" and so on, descriptor is useful to the scheme defined by ISO/IEC 23009-1:2014/Amd.2:2015.

> +And descriptor_str must be a properly formatted XML element, which is encoded by base64.

Two comments:
1. Please provide an example here. So that it is easier for people to understand
2. Why do we need this to be base64 encoded? What is the use-case where a normal string doesn't work?
>  @item timeout @var{timeout}

>  Set timeout for socket I/O operations. Applicable only for HTTP output.

>  @item index_correction @var{index_correction}

> diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c

> index b25afb4..a48031c 100644

> --- a/libavformat/dashenc.c

> +++ b/libavformat/dashenc.c

> @@ -34,6 +34,7 @@

>  #include "libavutil/rational.h"

>  #include "libavutil/time.h"

>  #include "libavutil/time_internal.h"

> +#include "libavutil/base64.h"

>  

>  #include "avc.h"

>  #include "avformat.h"

> @@ -68,6 +69,7 @@ typedef struct Segment {

>  

>  typedef struct AdaptationSet {

>      char id[10];

> +    char descriptor[1024];

Please change this char * and allocate it dynamically. I understand there are some legacy code in dashenc using this 1024 length.
But at least new code should follow dynamic allocation.
>      enum AVMediaType media_type;

>      AVDictionary *metadata;

>      AVRational min_frame_rate, max_frame_rate;

> @@ -748,7 +750,8 @@ static int write_adaptation_set(AVFormatContext *s, AVIOContext *out, int as_ind

>      role = av_dict_get(as->metadata, "role", NULL, 0);

>      if (role)

>          avio_printf(out, "\t\t\t<Role schemeIdUri=\"urn:mpeg:dash:role:2011\" value=\"%s\"/>\n", role->value);

> -

> +    if (strlen(as->descriptor))

> +        avio_printf(out, "\t\t\t%s\n", as->descriptor);

>      for (i = 0; i < s->nb_streams; i++) {

>          OutputStream *os = &c->streams[i];

>          char bandwidth_str[64] = {'\0'};

> @@ -820,7 +823,7 @@ static int parse_adaptation_sets(AVFormatContext *s)

>  {

>      DASHContext *c = s->priv_data;

>      const char *p = c->adaptation_sets;

> -    enum { new_set, parse_id, parsing_streams } state;

> +    enum { new_set, parse_id, parsing_streams, parse_descriptor } state;

>      AdaptationSet *as;

>      int i, n, ret;

>  

> @@ -837,6 +840,9 @@ static int parse_adaptation_sets(AVFormatContext *s)

>      }

>  

>      // syntax id=0,streams=0,1,2 id=1,streams=3,4 and so on

> +    // option id=0,descriptor=descriptor_str,streams=0,1,2 and so on

> +    // descriptor is useful to the scheme defined by ISO/IEC 23009-1:2014/Amd.2:2015

> +    // descriptor_str must be a properly formatted XML element, encoded by base64.

>      state = new_set;

>      while (*p) {

>          if (*p == ' ') {

> @@ -854,7 +860,30 @@ static int parse_adaptation_sets(AVFormatContext *s)

>              if (*p)

>                  p++;

>              state = parse_id;

> -        } else if (state == parse_id && av_strstart(p, "streams=", &p)) {

> +        } else if (state == parse_id && av_strstart(p, "descriptor=", &p)) {

> +            char *encode_str, *decode_str;

> +            int decode_size, ret;

> +

> +            n = strcspn(p, ",");

> +            encode_str = av_strndup(p, n);

> +            decode_size = AV_BASE64_DECODE_SIZE(n);

> +            decode_str = av_mallocz(decode_size);

> +            if (decode_str) {

> +                ret = av_base64_decode(decode_str, encode_str, decode_size);

> +                if (ret >= 0)

> +                    snprintf(as->descriptor, sizeof(as->descriptor), "%.*s", decode_size, decode_str);

> +                else

> +                    av_log(s, AV_LOG_WARNING, "descriptor string is invalid base64 encode\n");

> +            } else {

> +                av_log(s, AV_LOG_WARNING, "av_mallocz failed, will not parse descriptor\n");

> +            }

> +            p += n;

> +            if (*p)

> +                p++;

> +            state = parse_descriptor;

> +            av_freep(&encode_str);

> +            av_freep(&decode_str);

> +        } else if ((state == parse_id || state == parse_descriptor) && av_strstart(p, "streams=", &p)) { //descriptor is optional 

>              state = parsing_streams;

>          } else if (state == parsing_streams) {

>              AdaptationSet *as = &c->as[c->nb_as - 1];

Regards,
Karthick
leozhang July 17, 2019, 4:51 a.m. UTC | #3
Jeyapal, Karthick <kjeyapal@akamai.com> 于2019年7月17日周三 上午10:46写道:
>
>
> On 7/15/19 8:41 AM, leozhang wrote:
> > change history:
> > 1. remove unnecessary cast.
> > 2. add some braces.
> >
> > Please comment, Thanks
> Thanks for sending the patch. Please find some of my comments inlined below.
Thanks for your comments. I made some changes below. Please review it, thanks.
> >
> > Signed-off-by: leozhang <leozhang@qiyi.com>
> > ---
> >  doc/muxers.texi       |  3 +++
> >  libavformat/dashenc.c | 35 ++++++++++++++++++++++++++++++++---
> >  2 files changed, 35 insertions(+), 3 deletions(-)
> >
> > diff --git a/doc/muxers.texi b/doc/muxers.texi
> > index b109297..ac06ad2 100644
> > --- a/doc/muxers.texi
> > +++ b/doc/muxers.texi
> > @@ -275,6 +275,9 @@ of the adaptation sets and a,b,c,d and e are the indices of the mapped streams.
> >  To map all video (or audio) streams to an AdaptationSet, "v" (or "a") can be used as stream identifier instead of IDs.
> >
> >  When no assignment is defined, this defaults to an AdaptationSet for each stream.
> > +
> > +Optional syntax is "id=x,descriptor=descriptor_str,streams=a,b,c id=y,streams=d,e" and so on, descriptor is useful to the scheme defined by ISO/IEC 23009-1:2014/Amd.2:2015.
> > +And descriptor_str must be a properly formatted XML element, which is encoded by base64.
> Two comments:
> 1. Please provide an example here. So that it is easier for people to understand
For the instance using descriptor in VR tiled video application,
this short interesting video
https://www.hhi.fraunhofer.de/en/departments/vca/research-groups/multimedia-communications/research-topics/mpeg-omaf.html
is more intuitive than a textual description.
Then, how DASH pack media data in tile based streaming?
Refer ISO/IEC 23009-1:2014/Amd.2:2015, for example, the descriptor
string <SupplementalProperty schemeIdUri="urn:mpeg:dash:srd:2014"
value="0,0,0,1,1,2,2"/> indicates that AdaptationSet is
the top-left corner tile of full video divided into 2x2 tiles.
Finally, how to use FFmpeg DASH muxer to generate the tile based streaming?
We split the video by NxN tiles, insert descriptor syntax together
with AdaptationSet in MPD.
For example, the pseudo ffmpeg command {-adaptation_sets
"id=0,descriptor=PFN1cHBsZW1lbnRhbFByb3BlcnR5IHNjaGVtZUlkVXJpPSJ1cm46bXBlZzpkYXNoOnNyZDoyMDE0IiB2YWx1ZT0iMCwwLDAsMSwxLDIsMiIvPg==,streams=v"}
will
insert descriptor string <SupplementalProperty
schemeIdUri="urn:mpeg:dash:srd:2014" value="0,0,0,1,1,2,2"/> like
below
<!-- top-left tile of the full video divided into 2x2 tiles -->
<AdaptationSet segmentAlignment="true" subsegmentAlignment="true"
subsegmentStartsWithSAP="1">
    <SupplementalProperty schemeIdUri="urn:mpeg:dash:srd:2014"
value="0,0,0,1,1,2,2"/>
    <Representation mimeType="video/mp4" codecs="avc1.42c00d"
width="640" height="360" bandwidth="218284" startWithSAP="1">
      ...
    </Representation>
</AdaptationSet>

In addition to VR applications, zoomed video part can also be
indicated by descriptor.
> 2. Why do we need this to be base64 encoded? What is the use-case where a normal string doesn't work?
The parser code used comma and space as separator. The unencrypted
descriptor string like <SupplementalProperty
schemeIdUri="urn:mpeg:dash:srd:2014" value="0,0,0,1,1,2,2"/> contains
comma and space, which disturbs the normal parse result.
> >  @item timeout @var{timeout}
> >  Set timeout for socket I/O operations. Applicable only for HTTP output.
> >  @item index_correction @var{index_correction}
> > diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c
> > index b25afb4..a48031c 100644
> > --- a/libavformat/dashenc.c
> > +++ b/libavformat/dashenc.c
> > @@ -34,6 +34,7 @@
> >  #include "libavutil/rational.h"
> >  #include "libavutil/time.h"
> >  #include "libavutil/time_internal.h"
> > +#include "libavutil/base64.h"
> >
> >  #include "avc.h"
> >  #include "avformat.h"
> > @@ -68,6 +69,7 @@ typedef struct Segment {
> >
> >  typedef struct AdaptationSet {
> >      char id[10];
> > +    char descriptor[1024];
> Please change this char * and allocate it dynamically. I understand there are some legacy code in dashenc using this 1024 length.
> But at least new code should follow dynamic allocation.
Agree, will fix it
> >      enum AVMediaType media_type;
> >      AVDictionary *metadata;
> >      AVRational min_frame_rate, max_frame_rate;
> > @@ -748,7 +750,8 @@ static int write_adaptation_set(AVFormatContext *s, AVIOContext *out, int as_ind
> >      role = av_dict_get(as->metadata, "role", NULL, 0);
> >      if (role)
> >          avio_printf(out, "\t\t\t<Role schemeIdUri=\"urn:mpeg:dash:role:2011\" value=\"%s\"/>\n", role->value);
> > -
> > +    if (strlen(as->descriptor))
> > +        avio_printf(out, "\t\t\t%s\n", as->descriptor);
> >      for (i = 0; i < s->nb_streams; i++) {
> >          OutputStream *os = &c->streams[i];
> >          char bandwidth_str[64] = {'\0'};
> > @@ -820,7 +823,7 @@ static int parse_adaptation_sets(AVFormatContext *s)
> >  {
> >      DASHContext *c = s->priv_data;
> >      const char *p = c->adaptation_sets;
> > -    enum { new_set, parse_id, parsing_streams } state;
> > +    enum { new_set, parse_id, parsing_streams, parse_descriptor } state;
> >      AdaptationSet *as;
> >      int i, n, ret;
> >
> > @@ -837,6 +840,9 @@ static int parse_adaptation_sets(AVFormatContext *s)
> >      }
> >
> >      // syntax id=0,streams=0,1,2 id=1,streams=3,4 and so on
> > +    // option id=0,descriptor=descriptor_str,streams=0,1,2 and so on
> > +    // descriptor is useful to the scheme defined by ISO/IEC 23009-1:2014/Amd.2:2015
> > +    // descriptor_str must be a properly formatted XML element, encoded by base64.
> >      state = new_set;
> >      while (*p) {
> >          if (*p == ' ') {
> > @@ -854,7 +860,30 @@ static int parse_adaptation_sets(AVFormatContext *s)
> >              if (*p)
> >                  p++;
> >              state = parse_id;
> > -        } else if (state == parse_id && av_strstart(p, "streams=", &p)) {
> > +        } else if (state == parse_id && av_strstart(p, "descriptor=", &p)) {
> > +            char *encode_str, *decode_str;
> > +            int decode_size, ret;
> > +
> > +            n = strcspn(p, ",");
> > +            encode_str = av_strndup(p, n);
> > +            decode_size = AV_BASE64_DECODE_SIZE(n);
> > +            decode_str = av_mallocz(decode_size);
> > +            if (decode_str) {
> > +                ret = av_base64_decode(decode_str, encode_str, decode_size);
> > +                if (ret >= 0)
> > +                    snprintf(as->descriptor, sizeof(as->descriptor), "%.*s", decode_size, decode_str);
> > +                else
> > +                    av_log(s, AV_LOG_WARNING, "descriptor string is invalid base64 encode\n");
> > +            } else {
> > +                av_log(s, AV_LOG_WARNING, "av_mallocz failed, will not parse descriptor\n");
> > +            }
> > +            p += n;
> > +            if (*p)
> > +                p++;
> > +            state = parse_descriptor;
> > +            av_freep(&encode_str);
> > +            av_freep(&decode_str);
> > +        } else if ((state == parse_id || state == parse_descriptor) && av_strstart(p, "streams=", &p)) { //descriptor is optional
> >              state = parsing_streams;
> >          } else if (state == parsing_streams) {
> >              AdaptationSet *as = &c->as[c->nb_as - 1];
> Regards,
> Karthick
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Jeyapal, Karthick July 18, 2019, 5:22 a.m. UTC | #4
On 7/17/19 10:21 AM, Tao Zhang wrote:
> Jeyapal, Karthick <kjeyapal@akamai.com> 于2019年7月17日周三 上午10:46写道:

>>

>>

>> On 7/15/19 8:41 AM, leozhang wrote:

>>> change history:

>>> 1. remove unnecessary cast.

>>> 2. add some braces.

>>>

>>> Please comment, Thanks

>> Thanks for sending the patch. Please find some of my comments inlined below.

> Thanks for your comments. I made some changes below. Please review it, thanks.

>>>

>>> Signed-off-by: leozhang <leozhang@qiyi.com>

>>> ---

>>>  doc/muxers.texi       |  3 +++

>>>  libavformat/dashenc.c | 35 ++++++++++++++++++++++++++++++++---

>>>  2 files changed, 35 insertions(+), 3 deletions(-)

>>>

>>> diff --git a/doc/muxers.texi b/doc/muxers.texi

>>> index b109297..ac06ad2 100644

>>> --- a/doc/muxers.texi

>>> +++ b/doc/muxers.texi

>>> @@ -275,6 +275,9 @@ of the adaptation sets and a,b,c,d and e are the indices of the mapped streams.

>>>  To map all video (or audio) streams to an AdaptationSet, "v" (or "a") can be used as stream identifier instead of IDs.

>>>

>>>  When no assignment is defined, this defaults to an AdaptationSet for each stream.

>>> +

>>> +Optional syntax is "id=x,descriptor=descriptor_str,streams=a,b,c id=y,streams=d,e" and so on, descriptor is useful to the scheme defined by ISO/IEC 23009-1:2014/Amd.2:2015.

>>> +And descriptor_str must be a properly formatted XML element, which is encoded by base64.

>> Two comments:

>> 1. Please provide an example here. So that it is easier for people to understand

> For the instance using descriptor in VR tiled video application,

> this short interesting video

> https://www.hhi.fraunhofer.de/en/departments/vca/research-groups/multimedia-communications/research-topics/mpeg-omaf.html

> is more intuitive than a textual description.

> Then, how DASH pack media data in tile based streaming?

> Refer ISO/IEC 23009-1:2014/Amd.2:2015, for example, the descriptor

> string <SupplementalProperty schemeIdUri="urn:mpeg:dash:srd:2014"

> value="0,0,0,1,1,2,2"/> indicates that AdaptationSet is

> the top-left corner tile of full video divided into 2x2 tiles.

> Finally, how to use FFmpeg DASH muxer to generate the tile based streaming?

> We split the video by NxN tiles, insert descriptor syntax together

> with AdaptationSet in MPD.

> For example, the pseudo ffmpeg command {-adaptation_sets

> "id=0,descriptor=PFN1cHBsZW1lbnRhbFByb3BlcnR5IHNjaGVtZUlkVXJpPSJ1cm46bXBlZzpkYXNoOnNyZDoyMDE0IiB2YWx1ZT0iMCwwLDAsMSwxLDIsMiIvPg==,streams=v"}

> will

> insert descriptor string <SupplementalProperty

> schemeIdUri="urn:mpeg:dash:srd:2014" value="0,0,0,1,1,2,2"/> like

> below

> <!-- top-left tile of the full video divided into 2x2 tiles -->

> <AdaptationSet segmentAlignment="true" subsegmentAlignment="true"

> subsegmentStartsWithSAP="1">

>     <SupplementalProperty schemeIdUri="urn:mpeg:dash:srd:2014"

> value="0,0,0,1,1,2,2"/>

>     <Representation mimeType="video/mp4" codecs="avc1.42c00d"

> width="640" height="360" bandwidth="218284" startWithSAP="1">

>       ...

>     </Representation>

> </AdaptationSet>

>

> In addition to VR applications, zoomed video part can also be

> indicated by descriptor.

Thanks for the detailed explanation. Please add the following/similar line to muxers.texi
For example, 
-adaptation_sets "id=0,descriptor=<SupplementalProperty schemeIdUri="urn:mpeg:dash:srd:2014" value="0,0,0,1,1,2,2"/>,streams=v"
>> 2. Why do we need this to be base64 encoded? What is the use-case where a normal string doesn't work?

> The parser code used comma and space as separator. The unencrypted

> descriptor string like <SupplementalProperty

> schemeIdUri="urn:mpeg:dash:srd:2014" value="0,0,0,1,1,2,2"/> contains

> comma and space, which disturbs the normal parse result.

Space is used as separator, only outside the if..else.. conditions. So, it won't count for spaces inside the descriptor string.
Regarding comma separator you can change the logic for descriptor alone, as the relevant code is anyways inside an "else if" for descriptor. 
You can look for ''>" symbol as the terminating char, instead of looking for a separator. Also add a note in muxers.texi mentioning that descriptor string should be a self-closing xml tag.
It is easier for users to send a self-closing xml tag as a command line input, rather than a base64 encoded string.
>>>  @item timeout @var{timeout}

>>>  Set timeout for socket I/O operations. Applicable only for HTTP output.

>>>  @item index_correction @var{index_correction}

>>> diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c

>>> index b25afb4..a48031c 100644

>>> --- a/libavformat/dashenc.c

>>> +++ b/libavformat/dashenc.c

>>> @@ -34,6 +34,7 @@

>>>  #include "libavutil/rational.h"

>>>  #include "libavutil/time.h"

>>>  #include "libavutil/time_internal.h"

>>> +#include "libavutil/base64.h"

>>>

>>>  #include "avc.h"

>>>  #include "avformat.h"

>>> @@ -68,6 +69,7 @@ typedef struct Segment {

>>>

>>>  typedef struct AdaptationSet {

>>>      char id[10];

>>> +    char descriptor[1024];

>> Please change this char * and allocate it dynamically. I understand there are some legacy code in dashenc using this 1024 length.

>> But at least new code should follow dynamic allocation.

> Agree, will fix it

>>>      enum AVMediaType media_type;

>>>      AVDictionary *metadata;

>>>      AVRational min_frame_rate, max_frame_rate;

>>> @@ -748,7 +750,8 @@ static int write_adaptation_set(AVFormatContext *s, AVIOContext *out, int as_ind

>>>      role = av_dict_get(as->metadata, "role", NULL, 0);

>>>      if (role)

>>>          avio_printf(out, "\t\t\t<Role schemeIdUri=\"urn:mpeg:dash:role:2011\" value=\"%s\"/>\n", role->value);

>>> -

>>> +    if (strlen(as->descriptor))

>>> +        avio_printf(out, "\t\t\t%s\n", as->descriptor);

>>>      for (i = 0; i < s->nb_streams; i++) {

>>>          OutputStream *os = &c->streams[i];

>>>          char bandwidth_str[64] = {'\0'};

>>> @@ -820,7 +823,7 @@ static int parse_adaptation_sets(AVFormatContext *s)

>>>  {

>>>      DASHContext *c = s->priv_data;

>>>      const char *p = c->adaptation_sets;

>>> -    enum { new_set, parse_id, parsing_streams } state;

>>> +    enum { new_set, parse_id, parsing_streams, parse_descriptor } state;

>>>      AdaptationSet *as;

>>>      int i, n, ret;

>>>

>>> @@ -837,6 +840,9 @@ static int parse_adaptation_sets(AVFormatContext *s)

>>>      }

>>>

>>>      // syntax id=0,streams=0,1,2 id=1,streams=3,4 and so on

>>> +    // option id=0,descriptor=descriptor_str,streams=0,1,2 and so on

>>> +    // descriptor is useful to the scheme defined by ISO/IEC 23009-1:2014/Amd.2:2015

>>> +    // descriptor_str must be a properly formatted XML element, encoded by base64.

>>>      state = new_set;

>>>      while (*p) {

>>>          if (*p == ' ') {

>>> @@ -854,7 +860,30 @@ static int parse_adaptation_sets(AVFormatContext *s)

>>>              if (*p)

>>>                  p++;

>>>              state = parse_id;

>>> -        } else if (state == parse_id && av_strstart(p, "streams=", &p)) {

>>> +        } else if (state == parse_id && av_strstart(p, "descriptor=", &p)) {

>>> +            char *encode_str, *decode_str;

>>> +            int decode_size, ret;

>>> +

>>> +            n = strcspn(p, ",");

>>> +            encode_str = av_strndup(p, n);

>>> +            decode_size = AV_BASE64_DECODE_SIZE(n);

>>> +            decode_str = av_mallocz(decode_size);

>>> +            if (decode_str) {

>>> +                ret = av_base64_decode(decode_str, encode_str, decode_size);

>>> +                if (ret >= 0)

>>> +                    snprintf(as->descriptor, sizeof(as->descriptor), "%.*s", decode_size, decode_str);

>>> +                else

>>> +                    av_log(s, AV_LOG_WARNING, "descriptor string is invalid base64 encode\n");

>>> +            } else {

>>> +                av_log(s, AV_LOG_WARNING, "av_mallocz failed, will not parse descriptor\n");

>>> +            }

>>> +            p += n;

>>> +            if (*p)

>>> +                p++;

>>> +            state = parse_descriptor;

>>> +            av_freep(&encode_str);

>>> +            av_freep(&decode_str);

>>> +        } else if ((state == parse_id || state == parse_descriptor) && av_strstart(p, "streams=", &p)) { //descriptor is optional

>>>              state = parsing_streams;

>>>          } else if (state == parsing_streams) {

>>>              AdaptationSet *as = &c->as[c->nb_as - 1];

>> Regards,

>> Karthick

>>

>> _______________________________________________

>> ffmpeg-devel mailing list

>> ffmpeg-devel@ffmpeg.org

>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

>>

>> To unsubscribe, visit link above, or email

>> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

> _______________________________________________

> ffmpeg-devel mailing list

> ffmpeg-devel@ffmpeg.org

> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

>

> To unsubscribe, visit link above, or email

> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
diff mbox

Patch

diff --git a/doc/muxers.texi b/doc/muxers.texi
index b109297..ac06ad2 100644
--- a/doc/muxers.texi
+++ b/doc/muxers.texi
@@ -275,6 +275,9 @@  of the adaptation sets and a,b,c,d and e are the indices of the mapped streams.
 To map all video (or audio) streams to an AdaptationSet, "v" (or "a") can be used as stream identifier instead of IDs.
 
 When no assignment is defined, this defaults to an AdaptationSet for each stream.
+
+Optional syntax is "id=x,descriptor=descriptor_str,streams=a,b,c id=y,streams=d,e" and so on, descriptor is useful to the scheme defined by ISO/IEC 23009-1:2014/Amd.2:2015.
+And descriptor_str must be a properly formatted XML element, which is encoded by base64.
 @item timeout @var{timeout}
 Set timeout for socket I/O operations. Applicable only for HTTP output.
 @item index_correction @var{index_correction}
diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c
index b25afb4..a48031c 100644
--- a/libavformat/dashenc.c
+++ b/libavformat/dashenc.c
@@ -34,6 +34,7 @@ 
 #include "libavutil/rational.h"
 #include "libavutil/time.h"
 #include "libavutil/time_internal.h"
+#include "libavutil/base64.h"
 
 #include "avc.h"
 #include "avformat.h"
@@ -68,6 +69,7 @@  typedef struct Segment {
 
 typedef struct AdaptationSet {
     char id[10];
+    char descriptor[1024];
     enum AVMediaType media_type;
     AVDictionary *metadata;
     AVRational min_frame_rate, max_frame_rate;
@@ -748,7 +750,8 @@  static int write_adaptation_set(AVFormatContext *s, AVIOContext *out, int as_ind
     role = av_dict_get(as->metadata, "role", NULL, 0);
     if (role)
         avio_printf(out, "\t\t\t<Role schemeIdUri=\"urn:mpeg:dash:role:2011\" value=\"%s\"/>\n", role->value);
-
+    if (strlen(as->descriptor))
+        avio_printf(out, "\t\t\t%s\n", as->descriptor);
     for (i = 0; i < s->nb_streams; i++) {
         OutputStream *os = &c->streams[i];
         char bandwidth_str[64] = {'\0'};
@@ -820,7 +823,7 @@  static int parse_adaptation_sets(AVFormatContext *s)
 {
     DASHContext *c = s->priv_data;
     const char *p = c->adaptation_sets;
-    enum { new_set, parse_id, parsing_streams } state;
+    enum { new_set, parse_id, parsing_streams, parse_descriptor } state;
     AdaptationSet *as;
     int i, n, ret;
 
@@ -837,6 +840,9 @@  static int parse_adaptation_sets(AVFormatContext *s)
     }
 
     // syntax id=0,streams=0,1,2 id=1,streams=3,4 and so on
+    // option id=0,descriptor=descriptor_str,streams=0,1,2 and so on
+    // descriptor is useful to the scheme defined by ISO/IEC 23009-1:2014/Amd.2:2015
+    // descriptor_str must be a properly formatted XML element, encoded by base64.
     state = new_set;
     while (*p) {
         if (*p == ' ') {
@@ -854,7 +860,30 @@  static int parse_adaptation_sets(AVFormatContext *s)
             if (*p)
                 p++;
             state = parse_id;
-        } else if (state == parse_id && av_strstart(p, "streams=", &p)) {
+        } else if (state == parse_id && av_strstart(p, "descriptor=", &p)) {
+            char *encode_str, *decode_str;
+            int decode_size, ret;
+
+            n = strcspn(p, ",");
+            encode_str = av_strndup(p, n);
+            decode_size = AV_BASE64_DECODE_SIZE(n);
+            decode_str = av_mallocz(decode_size);
+            if (decode_str) {
+                ret = av_base64_decode(decode_str, encode_str, decode_size);
+                if (ret >= 0)
+                    snprintf(as->descriptor, sizeof(as->descriptor), "%.*s", decode_size, decode_str);
+                else
+                    av_log(s, AV_LOG_WARNING, "descriptor string is invalid base64 encode\n");
+            } else {
+                av_log(s, AV_LOG_WARNING, "av_mallocz failed, will not parse descriptor\n");
+            }
+            p += n;
+            if (*p)
+                p++;
+            state = parse_descriptor;
+            av_freep(&encode_str);
+            av_freep(&decode_str);
+        } else if ((state == parse_id || state == parse_descriptor) && av_strstart(p, "streams=", &p)) { //descriptor is optional 
             state = parsing_streams;
         } else if (state == parsing_streams) {
             AdaptationSet *as = &c->as[c->nb_as - 1];