Message ID | 20161128212227.31911-1-matmaul@gmail.com |
---|---|
State | New |
Headers | show |
On 28/11/16 21:22, Mathieu Velten wrote: > --- > libavcodec/vaapi_vp9.c | 1 + > libavcodec/vp9.c | 32 +++++++++++++++++--------------- > libavcodec/vp9.h | 1 + > 3 files changed, 19 insertions(+), 15 deletions(-) Nice :) Tested on Kaby Lake, works for me (woo 180fps 4K 10-bit decode). This should probably be split into two patches, though - one for the generic vp9 hwaccel support, a second then enabling it for VAAPI. > diff --git a/libavcodec/vaapi_vp9.c b/libavcodec/vaapi_vp9.c > index b360dcb..9b3e81a 100644 > --- a/libavcodec/vaapi_vp9.c > +++ b/libavcodec/vaapi_vp9.c > @@ -38,6 +38,7 @@ static void fill_picture_parameters(AVCodecContext *avctx, > pp->first_partition_size = h->h.compressed_header_size; > > pp->profile = h->h.profile; > + pp->bit_depth = h->h.bpp; > > pp->filter_level = h->h.filter.level; > pp->sharpness_level = h->h.filter.sharpness; > diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c > index 0ec895a..ff526da 100644 > --- a/libavcodec/vp9.c > +++ b/libavcodec/vp9.c > @@ -68,7 +68,7 @@ typedef struct VP9Context { > ptrdiff_t y_stride, uv_stride; > > uint8_t ss_h, ss_v; > - uint8_t last_bpp, bpp, bpp_index, bytesperpixel; > + uint8_t last_bpp, bpp_index, bytesperpixel; > uint8_t last_keyframe; > // sb_cols/rows, rows/cols and last_fmt are used for allocating all internal > // arrays, and are thus per-thread. w/h and gf_fmt are synced between threads > @@ -258,7 +258,9 @@ static int update_size(AVCodecContext *ctx, int w, int h) > if ((res = ff_set_dimensions(ctx, w, h)) < 0) > return res; > > - if (s->pix_fmt == AV_PIX_FMT_YUV420P) { > + if (s->pix_fmt == AV_PIX_FMT_YUV420P || > + s->pix_fmt == AV_PIX_FMT_YUV420P10 || > + s->pix_fmt == AV_PIX_FMT_YUV420P12) { > #if CONFIG_VP9_DXVA2_HWACCEL > *fmtp++ = AV_PIX_FMT_DXVA2_VLD; > #endif This is enabling it for DXVA2 and D3D11VA as well? I'm guessing you probably didn't want to do that - I think it would be better with something more like <http://git.videolan.org/?p=ffmpeg.git;a=blob;f=libavcodec/hevc.c;hb=HEAD#l350>. > @@ -326,10 +328,10 @@ static int update_size(AVCodecContext *ctx, int w, int h) > av_freep(&s->b_base); > av_freep(&s->block_base); > > - if (s->bpp != s->last_bpp) { > - ff_vp9dsp_init(&s->dsp, s->bpp, ctx->flags & AV_CODEC_FLAG_BITEXACT); > - ff_videodsp_init(&s->vdsp, s->bpp); > - s->last_bpp = s->bpp; > + if (s->s.h.bpp != s->last_bpp) { > + ff_vp9dsp_init(&s->dsp, s->s.h.bpp, ctx->flags & AV_CODEC_FLAG_BITEXACT); > + ff_videodsp_init(&s->vdsp, s->s.h.bpp); > + s->last_bpp = s->s.h.bpp; > } > > return 0; > @@ -458,8 +460,8 @@ static int read_colorspace_details(AVCodecContext *ctx) > int bits = ctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12 > > s->bpp_index = bits; > - s->bpp = 8 + bits * 2; > - s->bytesperpixel = (7 + s->bpp) >> 3; > + s->s.h.bpp = 8 + bits * 2; > + s->bytesperpixel = (7 + s->s.h.bpp) >> 3; > ctx->colorspace = colorspaces[get_bits(&s->gb, 3)]; > if (ctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1 > static const enum AVPixelFormat pix_fmt_rgb[3] = { > @@ -571,7 +573,7 @@ static int decode_frame_header(AVCodecContext *ctx, > return res; > } else { > s->ss_h = s->ss_v = 1; > - s->bpp = 8; > + s->s.h.bpp = 8; > s->bpp_index = 0; > s->bytesperpixel = 1; > s->pix_fmt = AV_PIX_FMT_YUV420P; > @@ -2278,7 +2280,7 @@ static int decode_coeffs_b_16bpp(VP9Context *s, int16_t *coef, int n_coeffs, > const int16_t (*nb)[2], const int16_t *band_counts, > const int16_t *qmul) > { > - return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 0, s->bpp, cnt, eob, p, > + return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 0, s->s.h.bpp, cnt, eob, p, > nnz, scan, nb, band_counts, qmul); > } > > @@ -2288,7 +2290,7 @@ static int decode_coeffs_b32_16bpp(VP9Context *s, int16_t *coef, int n_coeffs, > const int16_t (*nb)[2], const int16_t *band_counts, > const int16_t *qmul) > { > - return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 0, s->bpp, cnt, eob, p, > + return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 0, s->s.h.bpp, cnt, eob, p, > nnz, scan, nb, band_counts, qmul); > } > > @@ -2479,7 +2481,7 @@ static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t ** > int have_top = row > 0 || y > 0; > int have_left = col > s->tile_col_start || x > 0; > int have_right = x < w - 1; > - int bpp = s->bpp; > + int bpp = s->s.h.bpp; > static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = { > [VERT_PRED] = { { DC_127_PRED, VERT_PRED }, > { DC_127_PRED, VERT_PRED } }, > @@ -3310,13 +3312,13 @@ static void decode_b(AVCodecContext *ctx, int row, int col, > s->uv_stride = f->linesize[1]; > } > if (b->intra) { > - if (s->bpp > 8) { > + if (s->s.h.bpp > 8) { > intra_recon_16bpp(ctx, yoff, uvoff); > } else { > intra_recon_8bpp(ctx, yoff, uvoff); > } > } else { > - if (s->bpp > 8) { > + if (s->s.h.bpp > 8) { > inter_recon_16bpp(ctx); > } else { > inter_recon_8bpp(ctx); > @@ -4353,7 +4355,7 @@ static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecCo > s->gf_fmt = ssrc->gf_fmt; > s->w = ssrc->w; > s->h = ssrc->h; > - s->bpp = ssrc->bpp; > + s->s.h.bpp = ssrc->s.h.bpp; > s->bpp_index = ssrc->bpp_index; > s->pix_fmt = ssrc->pix_fmt; > memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx)); > diff --git a/libavcodec/vp9.h b/libavcodec/vp9.h > index df5bd4d..89b1bd3 100644 > --- a/libavcodec/vp9.h > +++ b/libavcodec/vp9.h > @@ -137,6 +137,7 @@ typedef struct VP9Frame { > typedef struct VP9BitstreamHeader { > // bitstream header > uint8_t profile; > + uint8_t bpp; > uint8_t keyframe; > uint8_t invisible; > uint8_t errorres; > Everything else looks good to me (the making bpp available more generally), but someone else more familiar with this code should probably have a look too. Thanks, - Mark
Hi, On Mon, Nov 28, 2016 at 7:26 PM, Mark Thompson <sw@jkqxz.net> wrote: > On 28/11/16 21:22, Mathieu Velten wrote: > > --- > > libavcodec/vaapi_vp9.c | 1 + > > libavcodec/vp9.c | 32 +++++++++++++++++--------------- > > libavcodec/vp9.h | 1 + > > 3 files changed, 19 insertions(+), 15 deletions(-) > > Nice :) > > Tested on Kaby Lake, works for me (woo 180fps 4K 10-bit decode). > > This should probably be split into two patches, though - one for the > generic vp9 hwaccel support, a second then enabling it for VAAPI. > > > diff --git a/libavcodec/vaapi_vp9.c b/libavcodec/vaapi_vp9.c > > index b360dcb..9b3e81a 100644 > > --- a/libavcodec/vaapi_vp9.c > > +++ b/libavcodec/vaapi_vp9.c > > @@ -38,6 +38,7 @@ static void fill_picture_parameters(AVCodecContext > *avctx, > > pp->first_partition_size = h->h.compressed_header_size; > > > > pp->profile = h->h.profile; > > + pp->bit_depth = h->h.bpp; > > > > pp->filter_level = h->h.filter.level; > > pp->sharpness_level = h->h.filter.sharpness; > > diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c > > index 0ec895a..ff526da 100644 > > --- a/libavcodec/vp9.c > > +++ b/libavcodec/vp9.c > > @@ -68,7 +68,7 @@ typedef struct VP9Context { > > ptrdiff_t y_stride, uv_stride; > > > > uint8_t ss_h, ss_v; > > - uint8_t last_bpp, bpp, bpp_index, bytesperpixel; > > + uint8_t last_bpp, bpp_index, bytesperpixel; > > uint8_t last_keyframe; > > // sb_cols/rows, rows/cols and last_fmt are used for allocating all > internal > > // arrays, and are thus per-thread. w/h and gf_fmt are synced > between threads > > @@ -258,7 +258,9 @@ static int update_size(AVCodecContext *ctx, int w, > int h) > > if ((res = ff_set_dimensions(ctx, w, h)) < 0) > > return res; > > > > - if (s->pix_fmt == AV_PIX_FMT_YUV420P) { > > + if (s->pix_fmt == AV_PIX_FMT_YUV420P || > > + s->pix_fmt == AV_PIX_FMT_YUV420P10 || > > + s->pix_fmt == AV_PIX_FMT_YUV420P12) { > > #if CONFIG_VP9_DXVA2_HWACCEL > > *fmtp++ = AV_PIX_FMT_DXVA2_VLD; > > #endif > > This is enabling it for DXVA2 and D3D11VA as well? I'm guessing you > probably didn't want to do that - I think it would be better with something > more like <http://git.videolan.org/?p=ffmpeg.git;a=blob;f= > libavcodec/hevc.c;hb=HEAD#l350>. I'll let you guys figure out the details for this, but generic vp9.[ch] changes are OK with me. Thanks! Ronald
Thanks for your returns. I'll split that into 2 patches and only enable it for vaapi indeed. Mathieu Le mar. 29 nov. 2016 à 04:11, Ronald S. Bultje <rsbultje@gmail.com> a écrit : > Hi, > > On Mon, Nov 28, 2016 at 7:26 PM, Mark Thompson <sw@jkqxz.net> wrote: > > > On 28/11/16 21:22, Mathieu Velten wrote: > > > --- > > > libavcodec/vaapi_vp9.c | 1 + > > > libavcodec/vp9.c | 32 +++++++++++++++++--------------- > > > libavcodec/vp9.h | 1 + > > > 3 files changed, 19 insertions(+), 15 deletions(-) > > > > Nice :) > > > > Tested on Kaby Lake, works for me (woo 180fps 4K 10-bit decode). > > > > This should probably be split into two patches, though - one for the > > generic vp9 hwaccel support, a second then enabling it for VAAPI. > > > > > diff --git a/libavcodec/vaapi_vp9.c b/libavcodec/vaapi_vp9.c > > > index b360dcb..9b3e81a 100644 > > > --- a/libavcodec/vaapi_vp9.c > > > +++ b/libavcodec/vaapi_vp9.c > > > @@ -38,6 +38,7 @@ static void fill_picture_parameters(AVCodecContext > > *avctx, > > > pp->first_partition_size = h->h.compressed_header_size; > > > > > > pp->profile = h->h.profile; > > > + pp->bit_depth = h->h.bpp; > > > > > > pp->filter_level = h->h.filter.level; > > > pp->sharpness_level = h->h.filter.sharpness; > > > diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c > > > index 0ec895a..ff526da 100644 > > > --- a/libavcodec/vp9.c > > > +++ b/libavcodec/vp9.c > > > @@ -68,7 +68,7 @@ typedef struct VP9Context { > > > ptrdiff_t y_stride, uv_stride; > > > > > > uint8_t ss_h, ss_v; > > > - uint8_t last_bpp, bpp, bpp_index, bytesperpixel; > > > + uint8_t last_bpp, bpp_index, bytesperpixel; > > > uint8_t last_keyframe; > > > // sb_cols/rows, rows/cols and last_fmt are used for allocating > all > > internal > > > // arrays, and are thus per-thread. w/h and gf_fmt are synced > > between threads > > > @@ -258,7 +258,9 @@ static int update_size(AVCodecContext *ctx, int w, > > int h) > > > if ((res = ff_set_dimensions(ctx, w, h)) < 0) > > > return res; > > > > > > - if (s->pix_fmt == AV_PIX_FMT_YUV420P) { > > > + if (s->pix_fmt == AV_PIX_FMT_YUV420P || > > > + s->pix_fmt == AV_PIX_FMT_YUV420P10 || > > > + s->pix_fmt == AV_PIX_FMT_YUV420P12) { > > > #if CONFIG_VP9_DXVA2_HWACCEL > > > *fmtp++ = AV_PIX_FMT_DXVA2_VLD; > > > #endif > > > > This is enabling it for DXVA2 and D3D11VA as well? I'm guessing you > > probably didn't want to do that - I think it would be better with > something > > more like <http://git.videolan.org/?p=ffmpeg.git;a=blob;f= > > libavcodec/hevc.c;hb=HEAD#l350>. > > > I'll let you guys figure out the details for this, but generic vp9.[ch] > changes are OK with me. > > Thanks! > Ronald > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel >
diff --git a/libavcodec/vaapi_vp9.c b/libavcodec/vaapi_vp9.c index b360dcb..9b3e81a 100644 --- a/libavcodec/vaapi_vp9.c +++ b/libavcodec/vaapi_vp9.c @@ -38,6 +38,7 @@ static void fill_picture_parameters(AVCodecContext *avctx, pp->first_partition_size = h->h.compressed_header_size; pp->profile = h->h.profile; + pp->bit_depth = h->h.bpp; pp->filter_level = h->h.filter.level; pp->sharpness_level = h->h.filter.sharpness; diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c index 0ec895a..ff526da 100644 --- a/libavcodec/vp9.c +++ b/libavcodec/vp9.c @@ -68,7 +68,7 @@ typedef struct VP9Context { ptrdiff_t y_stride, uv_stride; uint8_t ss_h, ss_v; - uint8_t last_bpp, bpp, bpp_index, bytesperpixel; + uint8_t last_bpp, bpp_index, bytesperpixel; uint8_t last_keyframe; // sb_cols/rows, rows/cols and last_fmt are used for allocating all internal // arrays, and are thus per-thread. w/h and gf_fmt are synced between threads @@ -258,7 +258,9 @@ static int update_size(AVCodecContext *ctx, int w, int h) if ((res = ff_set_dimensions(ctx, w, h)) < 0) return res; - if (s->pix_fmt == AV_PIX_FMT_YUV420P) { + if (s->pix_fmt == AV_PIX_FMT_YUV420P || + s->pix_fmt == AV_PIX_FMT_YUV420P10 || + s->pix_fmt == AV_PIX_FMT_YUV420P12) { #if CONFIG_VP9_DXVA2_HWACCEL *fmtp++ = AV_PIX_FMT_DXVA2_VLD; #endif @@ -326,10 +328,10 @@ static int update_size(AVCodecContext *ctx, int w, int h) av_freep(&s->b_base); av_freep(&s->block_base); - if (s->bpp != s->last_bpp) { - ff_vp9dsp_init(&s->dsp, s->bpp, ctx->flags & AV_CODEC_FLAG_BITEXACT); - ff_videodsp_init(&s->vdsp, s->bpp); - s->last_bpp = s->bpp; + if (s->s.h.bpp != s->last_bpp) { + ff_vp9dsp_init(&s->dsp, s->s.h.bpp, ctx->flags & AV_CODEC_FLAG_BITEXACT); + ff_videodsp_init(&s->vdsp, s->s.h.bpp); + s->last_bpp = s->s.h.bpp; } return 0; @@ -458,8 +460,8 @@ static int read_colorspace_details(AVCodecContext *ctx) int bits = ctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12 s->bpp_index = bits; - s->bpp = 8 + bits * 2; - s->bytesperpixel = (7 + s->bpp) >> 3; + s->s.h.bpp = 8 + bits * 2; + s->bytesperpixel = (7 + s->s.h.bpp) >> 3; ctx->colorspace = colorspaces[get_bits(&s->gb, 3)]; if (ctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1 static const enum AVPixelFormat pix_fmt_rgb[3] = { @@ -571,7 +573,7 @@ static int decode_frame_header(AVCodecContext *ctx, return res; } else { s->ss_h = s->ss_v = 1; - s->bpp = 8; + s->s.h.bpp = 8; s->bpp_index = 0; s->bytesperpixel = 1; s->pix_fmt = AV_PIX_FMT_YUV420P; @@ -2278,7 +2280,7 @@ static int decode_coeffs_b_16bpp(VP9Context *s, int16_t *coef, int n_coeffs, const int16_t (*nb)[2], const int16_t *band_counts, const int16_t *qmul) { - return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 0, s->bpp, cnt, eob, p, + return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 0, s->s.h.bpp, cnt, eob, p, nnz, scan, nb, band_counts, qmul); } @@ -2288,7 +2290,7 @@ static int decode_coeffs_b32_16bpp(VP9Context *s, int16_t *coef, int n_coeffs, const int16_t (*nb)[2], const int16_t *band_counts, const int16_t *qmul) { - return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 0, s->bpp, cnt, eob, p, + return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 0, s->s.h.bpp, cnt, eob, p, nnz, scan, nb, band_counts, qmul); } @@ -2479,7 +2481,7 @@ static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t ** int have_top = row > 0 || y > 0; int have_left = col > s->tile_col_start || x > 0; int have_right = x < w - 1; - int bpp = s->bpp; + int bpp = s->s.h.bpp; static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = { [VERT_PRED] = { { DC_127_PRED, VERT_PRED }, { DC_127_PRED, VERT_PRED } }, @@ -3310,13 +3312,13 @@ static void decode_b(AVCodecContext *ctx, int row, int col, s->uv_stride = f->linesize[1]; } if (b->intra) { - if (s->bpp > 8) { + if (s->s.h.bpp > 8) { intra_recon_16bpp(ctx, yoff, uvoff); } else { intra_recon_8bpp(ctx, yoff, uvoff); } } else { - if (s->bpp > 8) { + if (s->s.h.bpp > 8) { inter_recon_16bpp(ctx); } else { inter_recon_8bpp(ctx); @@ -4353,7 +4355,7 @@ static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecCo s->gf_fmt = ssrc->gf_fmt; s->w = ssrc->w; s->h = ssrc->h; - s->bpp = ssrc->bpp; + s->s.h.bpp = ssrc->s.h.bpp; s->bpp_index = ssrc->bpp_index; s->pix_fmt = ssrc->pix_fmt; memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx)); diff --git a/libavcodec/vp9.h b/libavcodec/vp9.h index df5bd4d..89b1bd3 100644 --- a/libavcodec/vp9.h +++ b/libavcodec/vp9.h @@ -137,6 +137,7 @@ typedef struct VP9Frame { typedef struct VP9BitstreamHeader { // bitstream header uint8_t profile; + uint8_t bpp; uint8_t keyframe; uint8_t invisible; uint8_t errorres;