[FFmpeg-devel] lavc/vaapi-vp9: add support for profile 2 (bpp > 8)

Submitted by Mathieu Velten on Nov. 28, 2016, 9:22 p.m.

Details

Message ID 20161128212227.31911-1-matmaul@gmail.com
State New
Headers show

Commit Message

Mathieu Velten Nov. 28, 2016, 9:22 p.m.
---
 libavcodec/vaapi_vp9.c |  1 +
 libavcodec/vp9.c       | 32 +++++++++++++++++---------------
 libavcodec/vp9.h       |  1 +
 3 files changed, 19 insertions(+), 15 deletions(-)

Comments

Mark Thompson Nov. 29, 2016, 12:26 a.m.
On 28/11/16 21:22, Mathieu Velten wrote:
> ---
>  libavcodec/vaapi_vp9.c |  1 +
>  libavcodec/vp9.c       | 32 +++++++++++++++++---------------
>  libavcodec/vp9.h       |  1 +
>  3 files changed, 19 insertions(+), 15 deletions(-)

Nice :)

Tested on Kaby Lake, works for me (woo 180fps 4K 10-bit decode).

This should probably be split into two patches, though - one for the generic vp9 hwaccel support, a second then enabling it for VAAPI.

> diff --git a/libavcodec/vaapi_vp9.c b/libavcodec/vaapi_vp9.c
> index b360dcb..9b3e81a 100644
> --- a/libavcodec/vaapi_vp9.c
> +++ b/libavcodec/vaapi_vp9.c
> @@ -38,6 +38,7 @@ static void fill_picture_parameters(AVCodecContext                 *avctx,
>      pp->first_partition_size = h->h.compressed_header_size;
>  
>      pp->profile = h->h.profile;
> +    pp->bit_depth = h->h.bpp;
>  
>      pp->filter_level = h->h.filter.level;
>      pp->sharpness_level = h->h.filter.sharpness;
> diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
> index 0ec895a..ff526da 100644
> --- a/libavcodec/vp9.c
> +++ b/libavcodec/vp9.c
> @@ -68,7 +68,7 @@ typedef struct VP9Context {
>      ptrdiff_t y_stride, uv_stride;
>  
>      uint8_t ss_h, ss_v;
> -    uint8_t last_bpp, bpp, bpp_index, bytesperpixel;
> +    uint8_t last_bpp, bpp_index, bytesperpixel;
>      uint8_t last_keyframe;
>      // sb_cols/rows, rows/cols and last_fmt are used for allocating all internal
>      // arrays, and are thus per-thread. w/h and gf_fmt are synced between threads
> @@ -258,7 +258,9 @@ static int update_size(AVCodecContext *ctx, int w, int h)
>          if ((res = ff_set_dimensions(ctx, w, h)) < 0)
>              return res;
>  
> -        if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
> +        if (s->pix_fmt == AV_PIX_FMT_YUV420P ||
> +            s->pix_fmt == AV_PIX_FMT_YUV420P10 ||
> +            s->pix_fmt == AV_PIX_FMT_YUV420P12) {
>  #if CONFIG_VP9_DXVA2_HWACCEL
>              *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
>  #endif

This is enabling it for DXVA2 and D3D11VA as well?  I'm guessing you probably didn't want to do that - I think it would be better with something more like <http://git.videolan.org/?p=ffmpeg.git;a=blob;f=libavcodec/hevc.c;hb=HEAD#l350>.

> @@ -326,10 +328,10 @@ static int update_size(AVCodecContext *ctx, int w, int h)
>      av_freep(&s->b_base);
>      av_freep(&s->block_base);
>  
> -    if (s->bpp != s->last_bpp) {
> -        ff_vp9dsp_init(&s->dsp, s->bpp, ctx->flags & AV_CODEC_FLAG_BITEXACT);
> -        ff_videodsp_init(&s->vdsp, s->bpp);
> -        s->last_bpp = s->bpp;
> +    if (s->s.h.bpp != s->last_bpp) {
> +        ff_vp9dsp_init(&s->dsp, s->s.h.bpp, ctx->flags & AV_CODEC_FLAG_BITEXACT);
> +        ff_videodsp_init(&s->vdsp, s->s.h.bpp);
> +        s->last_bpp = s->s.h.bpp;
>      }
>  
>      return 0;
> @@ -458,8 +460,8 @@ static int read_colorspace_details(AVCodecContext *ctx)
>      int bits = ctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
>  
>      s->bpp_index = bits;
> -    s->bpp = 8 + bits * 2;
> -    s->bytesperpixel = (7 + s->bpp) >> 3;
> +    s->s.h.bpp = 8 + bits * 2;
> +    s->bytesperpixel = (7 + s->s.h.bpp) >> 3;
>      ctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
>      if (ctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
>          static const enum AVPixelFormat pix_fmt_rgb[3] = {
> @@ -571,7 +573,7 @@ static int decode_frame_header(AVCodecContext *ctx,
>                      return res;
>              } else {
>                  s->ss_h = s->ss_v = 1;
> -                s->bpp = 8;
> +                s->s.h.bpp = 8;
>                  s->bpp_index = 0;
>                  s->bytesperpixel = 1;
>                  s->pix_fmt = AV_PIX_FMT_YUV420P;
> @@ -2278,7 +2280,7 @@ static int decode_coeffs_b_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
>                                   const int16_t (*nb)[2], const int16_t *band_counts,
>                                   const int16_t *qmul)
>  {
> -    return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 0, s->bpp, cnt, eob, p,
> +    return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 0, s->s.h.bpp, cnt, eob, p,
>                                     nnz, scan, nb, band_counts, qmul);
>  }
>  
> @@ -2288,7 +2290,7 @@ static int decode_coeffs_b32_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
>                                     const int16_t (*nb)[2], const int16_t *band_counts,
>                                     const int16_t *qmul)
>  {
> -    return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 0, s->bpp, cnt, eob, p,
> +    return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 0, s->s.h.bpp, cnt, eob, p,
>                                     nnz, scan, nb, band_counts, qmul);
>  }
>  
> @@ -2479,7 +2481,7 @@ static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **
>      int have_top = row > 0 || y > 0;
>      int have_left = col > s->tile_col_start || x > 0;
>      int have_right = x < w - 1;
> -    int bpp = s->bpp;
> +    int bpp = s->s.h.bpp;
>      static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
>          [VERT_PRED]            = { { DC_127_PRED,          VERT_PRED },
>                                     { DC_127_PRED,          VERT_PRED } },
> @@ -3310,13 +3312,13 @@ static void decode_b(AVCodecContext *ctx, int row, int col,
>          s->uv_stride = f->linesize[1];
>      }
>      if (b->intra) {
> -        if (s->bpp > 8) {
> +        if (s->s.h.bpp > 8) {
>              intra_recon_16bpp(ctx, yoff, uvoff);
>          } else {
>              intra_recon_8bpp(ctx, yoff, uvoff);
>          }
>      } else {
> -        if (s->bpp > 8) {
> +        if (s->s.h.bpp > 8) {
>              inter_recon_16bpp(ctx);
>          } else {
>              inter_recon_8bpp(ctx);
> @@ -4353,7 +4355,7 @@ static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecCo
>      s->gf_fmt = ssrc->gf_fmt;
>      s->w = ssrc->w;
>      s->h = ssrc->h;
> -    s->bpp = ssrc->bpp;
> +    s->s.h.bpp = ssrc->s.h.bpp;
>      s->bpp_index = ssrc->bpp_index;
>      s->pix_fmt = ssrc->pix_fmt;
>      memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
> diff --git a/libavcodec/vp9.h b/libavcodec/vp9.h
> index df5bd4d..89b1bd3 100644
> --- a/libavcodec/vp9.h
> +++ b/libavcodec/vp9.h
> @@ -137,6 +137,7 @@ typedef struct VP9Frame {
>  typedef struct VP9BitstreamHeader {
>      // bitstream header
>      uint8_t profile;
> +    uint8_t bpp;
>      uint8_t keyframe;
>      uint8_t invisible;
>      uint8_t errorres;
> 

Everything else looks good to me (the making bpp available more generally), but someone else more familiar with this code should probably have a look too.

Thanks,

- Mark
Ronald S. Bultje Nov. 29, 2016, 3:11 a.m.
Hi,

On Mon, Nov 28, 2016 at 7:26 PM, Mark Thompson <sw@jkqxz.net> wrote:

> On 28/11/16 21:22, Mathieu Velten wrote:
> > ---
> >  libavcodec/vaapi_vp9.c |  1 +
> >  libavcodec/vp9.c       | 32 +++++++++++++++++---------------
> >  libavcodec/vp9.h       |  1 +
> >  3 files changed, 19 insertions(+), 15 deletions(-)
>
> Nice :)
>
> Tested on Kaby Lake, works for me (woo 180fps 4K 10-bit decode).
>
> This should probably be split into two patches, though - one for the
> generic vp9 hwaccel support, a second then enabling it for VAAPI.
>
> > diff --git a/libavcodec/vaapi_vp9.c b/libavcodec/vaapi_vp9.c
> > index b360dcb..9b3e81a 100644
> > --- a/libavcodec/vaapi_vp9.c
> > +++ b/libavcodec/vaapi_vp9.c
> > @@ -38,6 +38,7 @@ static void fill_picture_parameters(AVCodecContext
>              *avctx,
> >      pp->first_partition_size = h->h.compressed_header_size;
> >
> >      pp->profile = h->h.profile;
> > +    pp->bit_depth = h->h.bpp;
> >
> >      pp->filter_level = h->h.filter.level;
> >      pp->sharpness_level = h->h.filter.sharpness;
> > diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
> > index 0ec895a..ff526da 100644
> > --- a/libavcodec/vp9.c
> > +++ b/libavcodec/vp9.c
> > @@ -68,7 +68,7 @@ typedef struct VP9Context {
> >      ptrdiff_t y_stride, uv_stride;
> >
> >      uint8_t ss_h, ss_v;
> > -    uint8_t last_bpp, bpp, bpp_index, bytesperpixel;
> > +    uint8_t last_bpp, bpp_index, bytesperpixel;
> >      uint8_t last_keyframe;
> >      // sb_cols/rows, rows/cols and last_fmt are used for allocating all
> internal
> >      // arrays, and are thus per-thread. w/h and gf_fmt are synced
> between threads
> > @@ -258,7 +258,9 @@ static int update_size(AVCodecContext *ctx, int w,
> int h)
> >          if ((res = ff_set_dimensions(ctx, w, h)) < 0)
> >              return res;
> >
> > -        if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
> > +        if (s->pix_fmt == AV_PIX_FMT_YUV420P ||
> > +            s->pix_fmt == AV_PIX_FMT_YUV420P10 ||
> > +            s->pix_fmt == AV_PIX_FMT_YUV420P12) {
> >  #if CONFIG_VP9_DXVA2_HWACCEL
> >              *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
> >  #endif
>
> This is enabling it for DXVA2 and D3D11VA as well?  I'm guessing you
> probably didn't want to do that - I think it would be better with something
> more like <http://git.videolan.org/?p=ffmpeg.git;a=blob;f=
> libavcodec/hevc.c;hb=HEAD#l350>.


I'll let you guys figure out the details for this, but generic vp9.[ch]
changes are OK with me.

Thanks!
Ronald
Mathieu Velten Nov. 29, 2016, 8:11 a.m.
Thanks for your returns.

I'll split that into 2 patches and only enable it for vaapi indeed.

Mathieu

Le mar. 29 nov. 2016 à 04:11, Ronald S. Bultje <rsbultje@gmail.com> a
écrit :

> Hi,
>
> On Mon, Nov 28, 2016 at 7:26 PM, Mark Thompson <sw@jkqxz.net> wrote:
>
> > On 28/11/16 21:22, Mathieu Velten wrote:
> > > ---
> > >  libavcodec/vaapi_vp9.c |  1 +
> > >  libavcodec/vp9.c       | 32 +++++++++++++++++---------------
> > >  libavcodec/vp9.h       |  1 +
> > >  3 files changed, 19 insertions(+), 15 deletions(-)
> >
> > Nice :)
> >
> > Tested on Kaby Lake, works for me (woo 180fps 4K 10-bit decode).
> >
> > This should probably be split into two patches, though - one for the
> > generic vp9 hwaccel support, a second then enabling it for VAAPI.
> >
> > > diff --git a/libavcodec/vaapi_vp9.c b/libavcodec/vaapi_vp9.c
> > > index b360dcb..9b3e81a 100644
> > > --- a/libavcodec/vaapi_vp9.c
> > > +++ b/libavcodec/vaapi_vp9.c
> > > @@ -38,6 +38,7 @@ static void fill_picture_parameters(AVCodecContext
> >              *avctx,
> > >      pp->first_partition_size = h->h.compressed_header_size;
> > >
> > >      pp->profile = h->h.profile;
> > > +    pp->bit_depth = h->h.bpp;
> > >
> > >      pp->filter_level = h->h.filter.level;
> > >      pp->sharpness_level = h->h.filter.sharpness;
> > > diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
> > > index 0ec895a..ff526da 100644
> > > --- a/libavcodec/vp9.c
> > > +++ b/libavcodec/vp9.c
> > > @@ -68,7 +68,7 @@ typedef struct VP9Context {
> > >      ptrdiff_t y_stride, uv_stride;
> > >
> > >      uint8_t ss_h, ss_v;
> > > -    uint8_t last_bpp, bpp, bpp_index, bytesperpixel;
> > > +    uint8_t last_bpp, bpp_index, bytesperpixel;
> > >      uint8_t last_keyframe;
> > >      // sb_cols/rows, rows/cols and last_fmt are used for allocating
> all
> > internal
> > >      // arrays, and are thus per-thread. w/h and gf_fmt are synced
> > between threads
> > > @@ -258,7 +258,9 @@ static int update_size(AVCodecContext *ctx, int w,
> > int h)
> > >          if ((res = ff_set_dimensions(ctx, w, h)) < 0)
> > >              return res;
> > >
> > > -        if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
> > > +        if (s->pix_fmt == AV_PIX_FMT_YUV420P ||
> > > +            s->pix_fmt == AV_PIX_FMT_YUV420P10 ||
> > > +            s->pix_fmt == AV_PIX_FMT_YUV420P12) {
> > >  #if CONFIG_VP9_DXVA2_HWACCEL
> > >              *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
> > >  #endif
> >
> > This is enabling it for DXVA2 and D3D11VA as well?  I'm guessing you
> > probably didn't want to do that - I think it would be better with
> something
> > more like <http://git.videolan.org/?p=ffmpeg.git;a=blob;f=
> > libavcodec/hevc.c;hb=HEAD#l350>.
>
>
> I'll let you guys figure out the details for this, but generic vp9.[ch]
> changes are OK with me.
>
> Thanks!
> Ronald
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>

Patch hide | download patch | download mbox

diff --git a/libavcodec/vaapi_vp9.c b/libavcodec/vaapi_vp9.c
index b360dcb..9b3e81a 100644
--- a/libavcodec/vaapi_vp9.c
+++ b/libavcodec/vaapi_vp9.c
@@ -38,6 +38,7 @@  static void fill_picture_parameters(AVCodecContext                 *avctx,
     pp->first_partition_size = h->h.compressed_header_size;
 
     pp->profile = h->h.profile;
+    pp->bit_depth = h->h.bpp;
 
     pp->filter_level = h->h.filter.level;
     pp->sharpness_level = h->h.filter.sharpness;
diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
index 0ec895a..ff526da 100644
--- a/libavcodec/vp9.c
+++ b/libavcodec/vp9.c
@@ -68,7 +68,7 @@  typedef struct VP9Context {
     ptrdiff_t y_stride, uv_stride;
 
     uint8_t ss_h, ss_v;
-    uint8_t last_bpp, bpp, bpp_index, bytesperpixel;
+    uint8_t last_bpp, bpp_index, bytesperpixel;
     uint8_t last_keyframe;
     // sb_cols/rows, rows/cols and last_fmt are used for allocating all internal
     // arrays, and are thus per-thread. w/h and gf_fmt are synced between threads
@@ -258,7 +258,9 @@  static int update_size(AVCodecContext *ctx, int w, int h)
         if ((res = ff_set_dimensions(ctx, w, h)) < 0)
             return res;
 
-        if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
+        if (s->pix_fmt == AV_PIX_FMT_YUV420P ||
+            s->pix_fmt == AV_PIX_FMT_YUV420P10 ||
+            s->pix_fmt == AV_PIX_FMT_YUV420P12) {
 #if CONFIG_VP9_DXVA2_HWACCEL
             *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
 #endif
@@ -326,10 +328,10 @@  static int update_size(AVCodecContext *ctx, int w, int h)
     av_freep(&s->b_base);
     av_freep(&s->block_base);
 
-    if (s->bpp != s->last_bpp) {
-        ff_vp9dsp_init(&s->dsp, s->bpp, ctx->flags & AV_CODEC_FLAG_BITEXACT);
-        ff_videodsp_init(&s->vdsp, s->bpp);
-        s->last_bpp = s->bpp;
+    if (s->s.h.bpp != s->last_bpp) {
+        ff_vp9dsp_init(&s->dsp, s->s.h.bpp, ctx->flags & AV_CODEC_FLAG_BITEXACT);
+        ff_videodsp_init(&s->vdsp, s->s.h.bpp);
+        s->last_bpp = s->s.h.bpp;
     }
 
     return 0;
@@ -458,8 +460,8 @@  static int read_colorspace_details(AVCodecContext *ctx)
     int bits = ctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
 
     s->bpp_index = bits;
-    s->bpp = 8 + bits * 2;
-    s->bytesperpixel = (7 + s->bpp) >> 3;
+    s->s.h.bpp = 8 + bits * 2;
+    s->bytesperpixel = (7 + s->s.h.bpp) >> 3;
     ctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
     if (ctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
         static const enum AVPixelFormat pix_fmt_rgb[3] = {
@@ -571,7 +573,7 @@  static int decode_frame_header(AVCodecContext *ctx,
                     return res;
             } else {
                 s->ss_h = s->ss_v = 1;
-                s->bpp = 8;
+                s->s.h.bpp = 8;
                 s->bpp_index = 0;
                 s->bytesperpixel = 1;
                 s->pix_fmt = AV_PIX_FMT_YUV420P;
@@ -2278,7 +2280,7 @@  static int decode_coeffs_b_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
                                  const int16_t (*nb)[2], const int16_t *band_counts,
                                  const int16_t *qmul)
 {
-    return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 0, s->bpp, cnt, eob, p,
+    return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 0, s->s.h.bpp, cnt, eob, p,
                                    nnz, scan, nb, band_counts, qmul);
 }
 
@@ -2288,7 +2290,7 @@  static int decode_coeffs_b32_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
                                    const int16_t (*nb)[2], const int16_t *band_counts,
                                    const int16_t *qmul)
 {
-    return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 0, s->bpp, cnt, eob, p,
+    return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 0, s->s.h.bpp, cnt, eob, p,
                                    nnz, scan, nb, band_counts, qmul);
 }
 
@@ -2479,7 +2481,7 @@  static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **
     int have_top = row > 0 || y > 0;
     int have_left = col > s->tile_col_start || x > 0;
     int have_right = x < w - 1;
-    int bpp = s->bpp;
+    int bpp = s->s.h.bpp;
     static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
         [VERT_PRED]            = { { DC_127_PRED,          VERT_PRED },
                                    { DC_127_PRED,          VERT_PRED } },
@@ -3310,13 +3312,13 @@  static void decode_b(AVCodecContext *ctx, int row, int col,
         s->uv_stride = f->linesize[1];
     }
     if (b->intra) {
-        if (s->bpp > 8) {
+        if (s->s.h.bpp > 8) {
             intra_recon_16bpp(ctx, yoff, uvoff);
         } else {
             intra_recon_8bpp(ctx, yoff, uvoff);
         }
     } else {
-        if (s->bpp > 8) {
+        if (s->s.h.bpp > 8) {
             inter_recon_16bpp(ctx);
         } else {
             inter_recon_8bpp(ctx);
@@ -4353,7 +4355,7 @@  static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecCo
     s->gf_fmt = ssrc->gf_fmt;
     s->w = ssrc->w;
     s->h = ssrc->h;
-    s->bpp = ssrc->bpp;
+    s->s.h.bpp = ssrc->s.h.bpp;
     s->bpp_index = ssrc->bpp_index;
     s->pix_fmt = ssrc->pix_fmt;
     memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
diff --git a/libavcodec/vp9.h b/libavcodec/vp9.h
index df5bd4d..89b1bd3 100644
--- a/libavcodec/vp9.h
+++ b/libavcodec/vp9.h
@@ -137,6 +137,7 @@  typedef struct VP9Frame {
 typedef struct VP9BitstreamHeader {
     // bitstream header
     uint8_t profile;
+    uint8_t bpp;
     uint8_t keyframe;
     uint8_t invisible;
     uint8_t errorres;