diff mbox

[FFmpeg-devel] avcodec/vp9_parser: export profile and pixel format

Message ID 20181003174921.1176-1-jamrial@gmail.com
State New
Headers show

Commit Message

James Almer Oct. 3, 2018, 5:49 p.m. UTC
Signed-off-by: James Almer <jamrial@gmail.com>
---
 libavcodec/vp9_parser.c | 82 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 80 insertions(+), 2 deletions(-)

Comments

chcunningham@chromium.org Oct. 24, 2018, 6:12 p.m. UTC | #1
On Wed, Oct 3, 2018 at 10:49 AM James Almer <jamrial@gmail.com> wrote:

> Signed-off-by: James Almer <jamrial@gmail.com>
> ---
>  libavcodec/vp9_parser.c | 82 ++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 80 insertions(+), 2 deletions(-)
>
> diff --git a/libavcodec/vp9_parser.c b/libavcodec/vp9_parser.c
> index 9531f34a32..d4110f20bf 100644
> --- a/libavcodec/vp9_parser.c
> +++ b/libavcodec/vp9_parser.c
> @@ -23,36 +23,114 @@
>
>  #include "libavutil/intreadwrite.h"
>  #include "libavcodec/get_bits.h"
> +#include "libavcodec/internal.h"
>  #include "parser.h"
>
> +#define VP9_SYNCCODE 0x498342
> +
> +static int read_colorspace_details(AVCodecParserContext *ctx,
> AVCodecContext *avctx,
> +                                   GetBitContext *gb)
> +{
> +    static const enum AVColorSpace colorspaces[8] = {
> +        AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709,
> AVCOL_SPC_SMPTE170M,
> +        AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED,
> AVCOL_SPC_RGB,
> +    };
> +    int colorspace, bits = avctx->profile <= 1 ? 0 : 1 + get_bits1(gb);
> // 0:8, 1:10, 2:12
> +
> +    colorspace = colorspaces[get_bits(gb, 3)];
> +    if (colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
> +        static const enum AVPixelFormat pix_fmt_rgb[3] = {
> +            AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
> +        };
> +        if (avctx->profile & 1) {
> +            if (get_bits1(gb)) // reserved bit
> +                return AVERROR_INVALIDDATA;
> +        } else
> +            return AVERROR_INVALIDDATA;
> +        ctx->format = pix_fmt_rgb[bits];
> +    } else {
> +        static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /*
> h */] = {
> +            { { AV_PIX_FMT_YUV444P,   AV_PIX_FMT_YUV422P },
> +              { AV_PIX_FMT_YUV440P,   AV_PIX_FMT_YUV420P } },
> +            { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
> +              { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
> +            { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
> +              { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
> +        };
> +        if (avctx->profile & 1) {
> +            int ss_h, ss_v, format;
> +
> +            ss_h = get_bits1(gb);
> +            ss_v = get_bits1(gb);
> +            format = pix_fmt_for_ss[bits][ss_v][ss_h];
> +            if (format == AV_PIX_FMT_YUV420P)
> +                // YUV 4:2:0 not supported in profiles 1 and 3
> +                return AVERROR_INVALIDDATA;
> +            else if (get_bits1(gb)) // color details reserved bit
> +                return AVERROR_INVALIDDATA;
> +            ctx->format = format;
> +        } else {
> +            ctx->format = pix_fmt_for_ss[bits][1][1];
> +        }
> +    }
> +
> +    return 0;
> +}
> +
>  static int parse(AVCodecParserContext *ctx,
>                   AVCodecContext *avctx,
>                   const uint8_t **out_data, int *out_size,
>                   const uint8_t *data, int size)
>  {
>      GetBitContext gb;
> -    int res, profile, keyframe;
> +    int res, profile, keyframe, invisible, errorres;
>
>      *out_data = data;
>      *out_size = size;
>
> -    if ((res = init_get_bits8(&gb, data, size)) < 0)
> +    if (!size || (res = init_get_bits8(&gb, data, size)) < 0)
>          return size; // parsers can't return errors
>      get_bits(&gb, 2); // frame marker
>      profile  = get_bits1(&gb);
>      profile |= get_bits1(&gb) << 1;
>      if (profile == 3) profile += get_bits1(&gb);
> +    if (profile > 3)
> +        return size;
>
> +    avctx->profile = profile;
>      if (get_bits1(&gb)) {
>          keyframe = 0;
> +        skip_bits(&gb, 3);
>      } else {
>          keyframe  = !get_bits1(&gb);
>      }
>
> +    invisible = !get_bits1(&gb);
> +    errorres  = get_bits1(&gb);
> +
>      if (!keyframe) {
> +        int intraonly = invisible ? get_bits1(&gb) : 0;
> +        if (!errorres)
> +            skip_bits(&gb, 2);
> +        if (intraonly) {
> +            if (get_bits_long(&gb, 24) != VP9_SYNCCODE) // synccode
> +                return size;
> +            if (profile >= 1) {
> +                if ((read_colorspace_details(ctx, avctx, &gb)) < 0)
> +                    return size;
> +            } else {
> +                ctx->format = AV_PIX_FMT_YUV420P;
> +            }
> +        }
> +
>          ctx->pict_type = AV_PICTURE_TYPE_P;
>          ctx->key_frame = 0;
>      } else {
> +        if (get_bits_long(&gb, 24) != VP9_SYNCCODE) // synccode
> +            return size;
> +        if (read_colorspace_details(ctx, avctx, &gb) < 0)
> +            return size;
> +
>          ctx->pict_type = AV_PICTURE_TYPE_I;
>          ctx->key_frame = 1;
>      }
> --
> 2.19.0
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Friendly ping for review.
James Almer Oct. 25, 2018, 11:50 p.m. UTC | #2
On 10/24/2018 3:12 PM, Chris Cunningham wrote:
> On Wed, Oct 3, 2018 at 10:49 AM James Almer <jamrial@gmail.com> wrote:
> 
>> Signed-off-by: James Almer <jamrial@gmail.com>
>> ---
>>  libavcodec/vp9_parser.c | 82 ++++++++++++++++++++++++++++++++++++++++-
>>  1 file changed, 80 insertions(+), 2 deletions(-)
>>
>> diff --git a/libavcodec/vp9_parser.c b/libavcodec/vp9_parser.c
>> index 9531f34a32..d4110f20bf 100644
>> --- a/libavcodec/vp9_parser.c
>> +++ b/libavcodec/vp9_parser.c
>> @@ -23,36 +23,114 @@
>>
>>  #include "libavutil/intreadwrite.h"
>>  #include "libavcodec/get_bits.h"
>> +#include "libavcodec/internal.h"
>>  #include "parser.h"
>>
>> +#define VP9_SYNCCODE 0x498342
>> +
>> +static int read_colorspace_details(AVCodecParserContext *ctx,
>> AVCodecContext *avctx,
>> +                                   GetBitContext *gb)
>> +{
>> +    static const enum AVColorSpace colorspaces[8] = {
>> +        AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709,
>> AVCOL_SPC_SMPTE170M,
>> +        AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED,
>> AVCOL_SPC_RGB,
>> +    };
>> +    int colorspace, bits = avctx->profile <= 1 ? 0 : 1 + get_bits1(gb);
>> // 0:8, 1:10, 2:12
>> +
>> +    colorspace = colorspaces[get_bits(gb, 3)];
>> +    if (colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
>> +        static const enum AVPixelFormat pix_fmt_rgb[3] = {
>> +            AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
>> +        };
>> +        if (avctx->profile & 1) {
>> +            if (get_bits1(gb)) // reserved bit
>> +                return AVERROR_INVALIDDATA;
>> +        } else
>> +            return AVERROR_INVALIDDATA;
>> +        ctx->format = pix_fmt_rgb[bits];
>> +    } else {
>> +        static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /*
>> h */] = {
>> +            { { AV_PIX_FMT_YUV444P,   AV_PIX_FMT_YUV422P },
>> +              { AV_PIX_FMT_YUV440P,   AV_PIX_FMT_YUV420P } },
>> +            { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
>> +              { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
>> +            { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
>> +              { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
>> +        };
>> +        if (avctx->profile & 1) {
>> +            int ss_h, ss_v, format;
>> +
>> +            ss_h = get_bits1(gb);
>> +            ss_v = get_bits1(gb);
>> +            format = pix_fmt_for_ss[bits][ss_v][ss_h];
>> +            if (format == AV_PIX_FMT_YUV420P)
>> +                // YUV 4:2:0 not supported in profiles 1 and 3
>> +                return AVERROR_INVALIDDATA;
>> +            else if (get_bits1(gb)) // color details reserved bit
>> +                return AVERROR_INVALIDDATA;
>> +            ctx->format = format;
>> +        } else {
>> +            ctx->format = pix_fmt_for_ss[bits][1][1];
>> +        }
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>>  static int parse(AVCodecParserContext *ctx,
>>                   AVCodecContext *avctx,
>>                   const uint8_t **out_data, int *out_size,
>>                   const uint8_t *data, int size)
>>  {
>>      GetBitContext gb;
>> -    int res, profile, keyframe;
>> +    int res, profile, keyframe, invisible, errorres;
>>
>>      *out_data = data;
>>      *out_size = size;
>>
>> -    if ((res = init_get_bits8(&gb, data, size)) < 0)
>> +    if (!size || (res = init_get_bits8(&gb, data, size)) < 0)
>>          return size; // parsers can't return errors
>>      get_bits(&gb, 2); // frame marker
>>      profile  = get_bits1(&gb);
>>      profile |= get_bits1(&gb) << 1;
>>      if (profile == 3) profile += get_bits1(&gb);
>> +    if (profile > 3)
>> +        return size;
>>
>> +    avctx->profile = profile;
>>      if (get_bits1(&gb)) {
>>          keyframe = 0;
>> +        skip_bits(&gb, 3);
>>      } else {
>>          keyframe  = !get_bits1(&gb);
>>      }
>>
>> +    invisible = !get_bits1(&gb);
>> +    errorres  = get_bits1(&gb);
>> +
>>      if (!keyframe) {
>> +        int intraonly = invisible ? get_bits1(&gb) : 0;
>> +        if (!errorres)
>> +            skip_bits(&gb, 2);
>> +        if (intraonly) {
>> +            if (get_bits_long(&gb, 24) != VP9_SYNCCODE) // synccode
>> +                return size;
>> +            if (profile >= 1) {
>> +                if ((read_colorspace_details(ctx, avctx, &gb)) < 0)
>> +                    return size;
>> +            } else {
>> +                ctx->format = AV_PIX_FMT_YUV420P;
>> +            }
>> +        }
>> +
>>          ctx->pict_type = AV_PICTURE_TYPE_P;
>>          ctx->key_frame = 0;
>>      } else {
>> +        if (get_bits_long(&gb, 24) != VP9_SYNCCODE) // synccode
>> +            return size;
>> +        if (read_colorspace_details(ctx, avctx, &gb) < 0)
>> +            return size;
>> +
>>          ctx->pict_type = AV_PICTURE_TYPE_I;
>>          ctx->key_frame = 1;
>>      }
>> --
>> 2.19.0
>>
>> _______________________________________________
>> ffmpeg-devel mailing list
>> ffmpeg-devel@ffmpeg.org
>> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> 
> Friendly ping for review.

Dropping this as it's not correct. It's not taking into account
show_existing_frame frames (And reading bits way past the header bits),
or superframes, where it may be parsing an invisible frame.

Pushed only the part setting profile.
chcunningham@chromium.org Oct. 29, 2018, 10:33 p.m. UTC | #3
Thanks for pushing the profile part. Turns out I'm also interested in pixel
format, but I follow your comments about dropping the rest of patch. With
the code as-is, how busted are we when parsing a super frame. Does it
correctly grab the profile of the first frame?

Chrome actually has a fairly complete VP9 parser
<https://cs.chromium.org/chromium/src/media/filters/vp9_parser.cc?rcl=adf255ef1da95255174643a6c1482e4305f877fd&l=478>
but the CodecPrivate is no longer set in extradata for VP9 so we don't
(AFAIK) have easy access to the frame header. Would you be open to
surfacing codec private via extradata (reverting this change
<http://ffmpeg.org/pipermail/ffmpeg-devel/2018-February/225548.html>)?

On Thu, Oct 25, 2018 at 4:50 PM James Almer <jamrial@gmail.com> wrote:

> On 10/24/2018 3:12 PM, Chris Cunningham wrote:
> > On Wed, Oct 3, 2018 at 10:49 AM James Almer <jamrial@gmail.com> wrote:
> >
> >> Signed-off-by: James Almer <jamrial@gmail.com>
> >> ---
> >>  libavcodec/vp9_parser.c | 82 ++++++++++++++++++++++++++++++++++++++++-
> >>  1 file changed, 80 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/libavcodec/vp9_parser.c b/libavcodec/vp9_parser.c
> >> index 9531f34a32..d4110f20bf 100644
> >> --- a/libavcodec/vp9_parser.c
> >> +++ b/libavcodec/vp9_parser.c
> >> @@ -23,36 +23,114 @@
> >>
> >>  #include "libavutil/intreadwrite.h"
> >>  #include "libavcodec/get_bits.h"
> >> +#include "libavcodec/internal.h"
> >>  #include "parser.h"
> >>
> >> +#define VP9_SYNCCODE 0x498342
> >> +
> >> +static int read_colorspace_details(AVCodecParserContext *ctx,
> >> AVCodecContext *avctx,
> >> +                                   GetBitContext *gb)
> >> +{
> >> +    static const enum AVColorSpace colorspaces[8] = {
> >> +        AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709,
> >> AVCOL_SPC_SMPTE170M,
> >> +        AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED,
> >> AVCOL_SPC_RGB,
> >> +    };
> >> +    int colorspace, bits = avctx->profile <= 1 ? 0 : 1 + get_bits1(gb);
> >> // 0:8, 1:10, 2:12
> >> +
> >> +    colorspace = colorspaces[get_bits(gb, 3)];
> >> +    if (colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
> >> +        static const enum AVPixelFormat pix_fmt_rgb[3] = {
> >> +            AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
> >> +        };
> >> +        if (avctx->profile & 1) {
> >> +            if (get_bits1(gb)) // reserved bit
> >> +                return AVERROR_INVALIDDATA;
> >> +        } else
> >> +            return AVERROR_INVALIDDATA;
> >> +        ctx->format = pix_fmt_rgb[bits];
> >> +    } else {
> >> +        static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2
> /*
> >> h */] = {
> >> +            { { AV_PIX_FMT_YUV444P,   AV_PIX_FMT_YUV422P },
> >> +              { AV_PIX_FMT_YUV440P,   AV_PIX_FMT_YUV420P } },
> >> +            { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
> >> +              { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
> >> +            { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
> >> +              { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
> >> +        };
> >> +        if (avctx->profile & 1) {
> >> +            int ss_h, ss_v, format;
> >> +
> >> +            ss_h = get_bits1(gb);
> >> +            ss_v = get_bits1(gb);
> >> +            format = pix_fmt_for_ss[bits][ss_v][ss_h];
> >> +            if (format == AV_PIX_FMT_YUV420P)
> >> +                // YUV 4:2:0 not supported in profiles 1 and 3
> >> +                return AVERROR_INVALIDDATA;
> >> +            else if (get_bits1(gb)) // color details reserved bit
> >> +                return AVERROR_INVALIDDATA;
> >> +            ctx->format = format;
> >> +        } else {
> >> +            ctx->format = pix_fmt_for_ss[bits][1][1];
> >> +        }
> >> +    }
> >> +
> >> +    return 0;
> >> +}
> >> +
> >>  static int parse(AVCodecParserContext *ctx,
> >>                   AVCodecContext *avctx,
> >>                   const uint8_t **out_data, int *out_size,
> >>                   const uint8_t *data, int size)
> >>  {
> >>      GetBitContext gb;
> >> -    int res, profile, keyframe;
> >> +    int res, profile, keyframe, invisible, errorres;
> >>
> >>      *out_data = data;
> >>      *out_size = size;
> >>
> >> -    if ((res = init_get_bits8(&gb, data, size)) < 0)
> >> +    if (!size || (res = init_get_bits8(&gb, data, size)) < 0)
> >>          return size; // parsers can't return errors
> >>      get_bits(&gb, 2); // frame marker
> >>      profile  = get_bits1(&gb);
> >>      profile |= get_bits1(&gb) << 1;
> >>      if (profile == 3) profile += get_bits1(&gb);
> >> +    if (profile > 3)
> >> +        return size;
> >>
> >> +    avctx->profile = profile;
> >>      if (get_bits1(&gb)) {
> >>          keyframe = 0;
> >> +        skip_bits(&gb, 3);
> >>      } else {
> >>          keyframe  = !get_bits1(&gb);
> >>      }
> >>
> >> +    invisible = !get_bits1(&gb);
> >> +    errorres  = get_bits1(&gb);
> >> +
> >>      if (!keyframe) {
> >> +        int intraonly = invisible ? get_bits1(&gb) : 0;
> >> +        if (!errorres)
> >> +            skip_bits(&gb, 2);
> >> +        if (intraonly) {
> >> +            if (get_bits_long(&gb, 24) != VP9_SYNCCODE) // synccode
> >> +                return size;
> >> +            if (profile >= 1) {
> >> +                if ((read_colorspace_details(ctx, avctx, &gb)) < 0)
> >> +                    return size;
> >> +            } else {
> >> +                ctx->format = AV_PIX_FMT_YUV420P;
> >> +            }
> >> +        }
> >> +
> >>          ctx->pict_type = AV_PICTURE_TYPE_P;
> >>          ctx->key_frame = 0;
> >>      } else {
> >> +        if (get_bits_long(&gb, 24) != VP9_SYNCCODE) // synccode
> >> +            return size;
> >> +        if (read_colorspace_details(ctx, avctx, &gb) < 0)
> >> +            return size;
> >> +
> >>          ctx->pict_type = AV_PICTURE_TYPE_I;
> >>          ctx->key_frame = 1;
> >>      }
> >> --
> >> 2.19.0
> >>
> >> _______________________________________________
> >> ffmpeg-devel mailing list
> >> ffmpeg-devel@ffmpeg.org
> >> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> >
> >
> > Friendly ping for review.
>
> Dropping this as it's not correct. It's not taking into account
> show_existing_frame frames (And reading bits way past the header bits),
> or superframes, where it may be parsing an invisible frame.
>
> Pushed only the part setting profile.
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
James Almer Oct. 29, 2018, 10:54 p.m. UTC | #4
On 10/29/2018 7:33 PM, Chris Cunningham wrote:
> Thanks for pushing the profile part. Turns out I'm also interested in
> pixel format, but I follow your comments about dropping the rest of
> patch. With the code as-is, how busted are we when parsing a super
> frame. Does it correctly grab the profile of the first frame?

It grabs the profile from the first frame in a super frame, which should
be safe since even if it's an invisible one it's not really going to be
different than the visible one, i think.

Also, when dealing with a super frame, to get other values like pixel
format and frame dimensions we'd have to make sure to parse the visible
frame, and if it's an inter frame also keep the reference frames around
to take said values from them.

> 
> Chrome actually has a fairly complete VP9 parser
> <https://cs.chromium.org/chromium/src/media/filters/vp9_parser.cc?rcl=adf255ef1da95255174643a6c1482e4305f877fd&l=478>
> but the CodecPrivate is no longer set in extradata for VP9 so we don't
> (AFAIK) have easy access to the frame header. Would you be open to
> surfacing codec private via extradata (reverting this change
> <http://ffmpeg.org/pipermail/ffmpeg-devel/2018-February/225548.html>)?

I don't think that change should be reverted, at least not without
further changes. Otherwise, WebM specific extradata (the one defined in
https://www.webmproject.org/docs/container) could make its way to other
containers. We'd have to add checks in supported muxers to ignore it
instead.

Also, what muxer currently writes that to CodecPrivate? I've looked at
Youtube samples and none has it.
chcunningham@chromium.org Nov. 2, 2018, 11:57 p.m. UTC | #5
> Also, when dealing with a super frame, to get other values like pixel
> format and frame dimensions we'd have to make sure to parse the visible
> frame, and if it's an inter frame also keep the reference frames around
> to take said values from them.

Could we avoid the challenges around reference and invisible frame by only
doing this parsing for key frames? IIUC these properties would only be
expected to change at the GOP boundary.

> I don't think that change should be reverted, at least not without
> further changes. Otherwise, WebM specific extradata (the one defined in
> https://www.webmproject.org/docs/container) could make its way to other
> containers. We'd have to add checks in supported muxers to ignore it
> instead.

Sorry, I forgot just how different webm is in this regard. We do have some
separate parsing code for the CodecPrivate, but you're also correct that it
is often not set.

> Also, what muxer currently writes that to CodecPrivate? I've looked at
> Youtube samples and none has it.

Maybe libWebM
https://github.com/webmproject/libwebm/blob/01c1d1d76f139345c442bfc8e61b4e1cba809059/common/hdr_util.h#L31

YouTube isn't doing it yet for VP9. But I think its required with AV1
content, so maybe we'll see it with new VP9 encodes too down the road.
Parsing the frame is definitely our best bet.

Chris
chcunningham@chromium.org Nov. 12, 2018, 10:48 p.m. UTC | #6
On Fri, Nov 2, 2018 at 4:57 PM Chris Cunningham <chcunningham@chromium.org>
wrote:

> > Also, when dealing with a super frame, to get other values like pixel
> > format and frame dimensions we'd have to make sure to parse the visible
> > frame, and if it's an inter frame also keep the reference frames around
> > to take said values from them.
>
> Could we avoid the challenges around reference and invisible frame by only
> doing this parsing for key frames? IIUC these properties would only be
> expected to change at the GOP boundary.
>

Friendly ping for feedback on this idea.

>
James Almer Nov. 16, 2018, 12:19 a.m. UTC | #7
On 11/2/2018 8:57 PM, Chris Cunningham wrote:
>> Also, when dealing with a super frame, to get other values like pixel
>> format and frame dimensions we'd have to make sure to parse the visible
>> frame, and if it's an inter frame also keep the reference frames around
>> to take said values from them.
> 
> Could we avoid the challenges around reference and invisible frame by only
> doing this parsing for key frames? IIUC these properties would only be
> expected to change at the GOP boundary.

Yes, that's an option. Setting all fields to undefined/unknown if it's
not a keyframe and stop parsing altogether.

Can a keyframe be contained within a superframe, for that matter? Or are
those guaranteed to be standalone?
diff mbox

Patch

diff --git a/libavcodec/vp9_parser.c b/libavcodec/vp9_parser.c
index 9531f34a32..d4110f20bf 100644
--- a/libavcodec/vp9_parser.c
+++ b/libavcodec/vp9_parser.c
@@ -23,36 +23,114 @@ 
 
 #include "libavutil/intreadwrite.h"
 #include "libavcodec/get_bits.h"
+#include "libavcodec/internal.h"
 #include "parser.h"
 
+#define VP9_SYNCCODE 0x498342
+
+static int read_colorspace_details(AVCodecParserContext *ctx, AVCodecContext *avctx,
+                                   GetBitContext *gb)
+{
+    static const enum AVColorSpace colorspaces[8] = {
+        AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
+        AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
+    };
+    int colorspace, bits = avctx->profile <= 1 ? 0 : 1 + get_bits1(gb); // 0:8, 1:10, 2:12
+
+    colorspace = colorspaces[get_bits(gb, 3)];
+    if (colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
+        static const enum AVPixelFormat pix_fmt_rgb[3] = {
+            AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
+        };
+        if (avctx->profile & 1) {
+            if (get_bits1(gb)) // reserved bit
+                return AVERROR_INVALIDDATA;
+        } else
+            return AVERROR_INVALIDDATA;
+        ctx->format = pix_fmt_rgb[bits];
+    } else {
+        static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
+            { { AV_PIX_FMT_YUV444P,   AV_PIX_FMT_YUV422P },
+              { AV_PIX_FMT_YUV440P,   AV_PIX_FMT_YUV420P } },
+            { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
+              { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
+            { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
+              { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
+        };
+        if (avctx->profile & 1) {
+            int ss_h, ss_v, format;
+
+            ss_h = get_bits1(gb);
+            ss_v = get_bits1(gb);
+            format = pix_fmt_for_ss[bits][ss_v][ss_h];
+            if (format == AV_PIX_FMT_YUV420P)
+                // YUV 4:2:0 not supported in profiles 1 and 3
+                return AVERROR_INVALIDDATA;
+            else if (get_bits1(gb)) // color details reserved bit
+                return AVERROR_INVALIDDATA;
+            ctx->format = format;
+        } else {
+            ctx->format = pix_fmt_for_ss[bits][1][1];
+        }
+    }
+
+    return 0;
+}
+
 static int parse(AVCodecParserContext *ctx,
                  AVCodecContext *avctx,
                  const uint8_t **out_data, int *out_size,
                  const uint8_t *data, int size)
 {
     GetBitContext gb;
-    int res, profile, keyframe;
+    int res, profile, keyframe, invisible, errorres;
 
     *out_data = data;
     *out_size = size;
 
-    if ((res = init_get_bits8(&gb, data, size)) < 0)
+    if (!size || (res = init_get_bits8(&gb, data, size)) < 0)
         return size; // parsers can't return errors
     get_bits(&gb, 2); // frame marker
     profile  = get_bits1(&gb);
     profile |= get_bits1(&gb) << 1;
     if (profile == 3) profile += get_bits1(&gb);
+    if (profile > 3)
+        return size;
 
+    avctx->profile = profile;
     if (get_bits1(&gb)) {
         keyframe = 0;
+        skip_bits(&gb, 3);
     } else {
         keyframe  = !get_bits1(&gb);
     }
 
+    invisible = !get_bits1(&gb);
+    errorres  = get_bits1(&gb);
+
     if (!keyframe) {
+        int intraonly = invisible ? get_bits1(&gb) : 0;
+        if (!errorres)
+            skip_bits(&gb, 2);
+        if (intraonly) {
+            if (get_bits_long(&gb, 24) != VP9_SYNCCODE) // synccode
+                return size;
+            if (profile >= 1) {
+                if ((read_colorspace_details(ctx, avctx, &gb)) < 0)
+                    return size;
+            } else {
+                ctx->format = AV_PIX_FMT_YUV420P;
+            }
+        }
+
         ctx->pict_type = AV_PICTURE_TYPE_P;
         ctx->key_frame = 0;
     } else {
+        if (get_bits_long(&gb, 24) != VP9_SYNCCODE) // synccode
+            return size;
+        if (read_colorspace_details(ctx, avctx, &gb) < 0)
+            return size;
+
         ctx->pict_type = AV_PICTURE_TYPE_I;
         ctx->key_frame = 1;
     }