diff mbox

[FFmpeg-devel] lavc/vaapi: Add VP8 decode hwaccel

Message ID a1322a56-f124-d127-36e1-0ad12f4498dc@gmail.com
State New
Headers show

Commit Message

Jun Zhao Nov. 11, 2016, 7:57 a.m. UTC
From 4635e7e4a0ea24f77e71ffc9a9074e75c61bfe44 Mon Sep 17 00:00:00 2001
From: Jun Zhao <jun.zhao@intel.com>
Date: Fri, 11 Nov 2016 15:51:01 +0800
Subject: [PATCH] lavc/vaapi: Add VP8 decode hwaccel

Add VP8 decode hwaccel based on the libav:
commit a9fb134730da1f9642eb5a2baa50943b8a4aa245
    lavc/vaapi: Add VP8 decode hwaccel
commit 75d642a944d5579e4ef20ff3701422a64692afcf
    vaapi_vp8: Explicitly include libva vp8 decode header

Reviewed-by: Jun Zhao <jun.zhao@intel.com>
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>

ase enter the commit message for your changes. Lines starting
---
 configure                   |   3 +
 libavcodec/Makefile         |   1 +
 libavcodec/allcodecs.c      |   1 +
 libavcodec/vaapi.c          |  15 ++++-
 libavcodec/vaapi.h          |   9 +++
 libavcodec/vaapi_internal.h |   3 +
 libavcodec/vp8.c            | 149 ++++++++++++++++++++++++++++++--------------
 libavcodec/vp8.h            |  29 ++++++++-
 8 files changed, 159 insertions(+), 51 deletions(-)

Comments

Timo Rothenpieler Nov. 11, 2016, 12:34 p.m. UTC | #1
This patch seems incomplete.
Did you forget to add the vaapi_vp8.c file before commiting?

Also, adding a new field to the API which is deprecated from the start
seems odd.
Mark Thompson Nov. 12, 2016, 1:57 p.m. UTC | #2
On 11/11/16 07:57, Jun Zhao wrote:
> From 4635e7e4a0ea24f77e71ffc9a9074e75c61bfe44 Mon Sep 17 00:00:00 2001
> From: Jun Zhao <jun.zhao@intel.com>
> Date: Fri, 11 Nov 2016 15:51:01 +0800
> Subject: [PATCH] lavc/vaapi: Add VP8 decode hwaccel
>
> Add VP8 decode hwaccel based on the libav:
> commit a9fb134730da1f9642eb5a2baa50943b8a4aa245
>     lavc/vaapi: Add VP8 decode hwaccel
> commit 75d642a944d5579e4ef20ff3701422a64692afcf
>     vaapi_vp8: Explicitly include libva vp8 decode header
>
> Reviewed-by: Jun Zhao <jun.zhao@intel.com>
> Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
>
> ase enter the commit message for your changes. Lines starting
> ---
>  configure                   |   3 +
>  libavcodec/Makefile         |   1 +
>  libavcodec/allcodecs.c      |   1 +
>  libavcodec/vaapi.c          |  15 ++++-
>  libavcodec/vaapi.h          |   9 +++
>  libavcodec/vaapi_internal.h |   3 +
>  libavcodec/vp8.c            | 149 ++++++++++++++++++++++++++++++--------------
>  libavcodec/vp8.h            |  29 ++++++++-
>  8 files changed, 159 insertions(+), 51 deletions(-)

(You've omitted the file vaapi_vp8.c, so the patch isn't currently usable.)

The patches implementing this are already in the merge queue.  Other than the part noted below and the backport, is there any difference to the functionality?

I would generally prefer to preserve synchronisation with libav - the normal merge will also get the original dependencies rather than backporting to the pre-hwcontext infrastructure.


> @@ -2800,14 +2849,18 @@ static int vp8_decode_update_thread_context(AVCodecContext *dst,
>          s->mb_width  = s_src->mb_width;
>          s->mb_height = s_src->mb_height;
>      }
> -
>      s->prob[0]      = s_src->prob[!s_src->update_probabilities];
>      s->segmentation = s_src->segmentation;
>      s->lf_delta     = s_src->lf_delta;
> +    s->pix_fmt = s_src->pix_fmt;
> +    s->mbskip_enabled = s_src->mbskip_enabled;
> +    s->filter = s_src->filter;
>      memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
> +    s->num_coeff_partitions = s_src->num_coeff_partitions;
> +    s->header_partition_size = s_src->header_partition_size;
>
>      for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
> -        if (s_src->frames[i].tf.f->data[0]) {
> +        if (s_src->frames[i].tf.f->buf[0]) {
>              int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
>              if (ret < 0)
>                  return ret;

This is fixing decode with frame threads?  I admit I don't think I ever tested with frame threading enabled, indeed it dies horribly in libav.

Does fate-vp8 using the hwaccel and threads pass in ffmpeg with this?  It fails in libav, but the setup might be different because of changes you've made.


Thanks,

- Mark
Mark Thompson Nov. 12, 2016, 4:24 p.m. UTC | #3
On 12/11/16 13:57, Mark Thompson wrote:
> On 11/11/16 07:57, Jun Zhao wrote:
>> @@ -2800,14 +2849,18 @@ static int vp8_decode_update_thread_context(AVCodecContext *dst,
>>          s->mb_width  = s_src->mb_width;
>>          s->mb_height = s_src->mb_height;
>>      }
>> -
>>      s->prob[0]      = s_src->prob[!s_src->update_probabilities];
>>      s->segmentation = s_src->segmentation;
>>      s->lf_delta     = s_src->lf_delta;
>> +    s->pix_fmt = s_src->pix_fmt;
>> +    s->mbskip_enabled = s_src->mbskip_enabled;
>> +    s->filter = s_src->filter;
>>      memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
>> +    s->num_coeff_partitions = s_src->num_coeff_partitions;
>> +    s->header_partition_size = s_src->header_partition_size;
>>
>>      for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
>> -        if (s_src->frames[i].tf.f->data[0]) {
>> +        if (s_src->frames[i].tf.f->buf[0]) {
>>              int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
>>              if (ret < 0)
>>                  return ret;
> 
> This is fixing decode with frame threads?  I admit I don't think I ever tested with frame threading enabled, indeed it dies horribly in libav.
> 
> Does fate-vp8 using the hwaccel and threads pass in ffmpeg with this?  It fails in libav, but the setup might be different because of changes you've made.

Right, only pix_fmt and the references should be copied here (the other parts are always overwritten from the frame header).

More synchronisation is required to make it actually work - see <https://lists.libav.org/pipermail/libav-devel/2016-November/080413.html>.  Assuming other people agree with the approach there, it should probably be merged at the same time as 4e528206bc4d968706401206cf54471739250ec7.

Thanks,

- Mark
Jun Zhao Nov. 14, 2016, 12:30 a.m. UTC | #4
On 2016/11/11 20:34, Timo Rothenpieler wrote:
> This patch seems incomplete.
> Did you forget to add the vaapi_vp8.c file before commiting?
> 

Yes, I forget to add the vaapi_vp8.c. And because Mark will merge the vaapi_vp8 
hwaccel and other vaapi infrastructure from Libav, so I will don't update this patch
until Mark update the patches.

> Also, adding a new field to the API which is deprecated from the start
> seems odd.
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
Jun Zhao Nov. 14, 2016, 12:48 a.m. UTC | #5
On 2016/11/12 21:57, Mark Thompson wrote:
> On 11/11/16 07:57, Jun Zhao wrote:
>> From 4635e7e4a0ea24f77e71ffc9a9074e75c61bfe44 Mon Sep 17 00:00:00 2001
>> From: Jun Zhao <jun.zhao@intel.com>
>> Date: Fri, 11 Nov 2016 15:51:01 +0800
>> Subject: [PATCH] lavc/vaapi: Add VP8 decode hwaccel
>>
>> Add VP8 decode hwaccel based on the libav:
>> commit a9fb134730da1f9642eb5a2baa50943b8a4aa245
>>     lavc/vaapi: Add VP8 decode hwaccel
>> commit 75d642a944d5579e4ef20ff3701422a64692afcf
>>     vaapi_vp8: Explicitly include libva vp8 decode header
>>
>> Reviewed-by: Jun Zhao <jun.zhao@intel.com>
>> Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
>>
>> ase enter the commit message for your changes. Lines starting
>> ---
>>  configure                   |   3 +
>>  libavcodec/Makefile         |   1 +
>>  libavcodec/allcodecs.c      |   1 +
>>  libavcodec/vaapi.c          |  15 ++++-
>>  libavcodec/vaapi.h          |   9 +++
>>  libavcodec/vaapi_internal.h |   3 +
>>  libavcodec/vp8.c            | 149 ++++++++++++++++++++++++++++++--------------
>>  libavcodec/vp8.h            |  29 ++++++++-
>>  8 files changed, 159 insertions(+), 51 deletions(-)
> 
> (You've omitted the file vaapi_vp8.c, so the patch isn't currently usable.)
> 
> The patches implementing this are already in the merge queue.  Other than the part noted below and the backport, is there any difference to the functionality?
> 
> I would generally prefer to preserve synchronisation with libav - the normal merge will also get the original dependencies rather than backporting to the pre-hwcontext infrastructure.
> 
> 

Please keep go on sync with Libav and submit the merged patch, I will don't update this patch
until you submit the merged patch. :)

>> @@ -2800,14 +2849,18 @@ static int vp8_decode_update_thread_context(AVCodecContext *dst,
>>          s->mb_width  = s_src->mb_width;
>>          s->mb_height = s_src->mb_height;
>>      }
>> -
>>      s->prob[0]      = s_src->prob[!s_src->update_probabilities];
>>      s->segmentation = s_src->segmentation;
>>      s->lf_delta     = s_src->lf_delta;
>> +    s->pix_fmt = s_src->pix_fmt;
>> +    s->mbskip_enabled = s_src->mbskip_enabled;
>> +    s->filter = s_src->filter;
>>      memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
>> +    s->num_coeff_partitions = s_src->num_coeff_partitions;
>> +    s->header_partition_size = s_src->header_partition_size;
>>
>>      for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
>> -        if (s_src->frames[i].tf.f->data[0]) {
>> +        if (s_src->frames[i].tf.f->buf[0]) {
>>              int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
>>              if (ret < 0)
>>                  return ret;
> 
> This is fixing decode with frame threads?  I admit I don't think I ever tested with frame threading enabled, indeed it dies horribly in libav.
> 
> Does fate-vp8 using the hwaccel and threads pass in ffmpeg with this?  It fails in libav, but the setup might be different because of changes you've made.
> 

As I know, vaapi hwaccel is not thread safe, so in the internal FATE test, I disable the
frame threads with the option "-threads 1 -thread_type frame+slice"

> 
> Thanks,
> 
> - Mark
>
diff mbox

Patch

diff --git a/configure b/configure
index 87b06f1..7b6a1c4 100755
--- a/configure
+++ b/configure
@@ -2668,6 +2668,8 @@  vp8_cuvid_hwaccel_deps="cuda cuvid"
 vp9_cuvid_hwaccel_deps="cuda cuvid"
 vp8_mediacodec_decoder_deps="mediacodec"
 vp8_mediacodec_hwaccel_deps="mediacodec"
+vp8_vaapi_hwaccel_deps="vaapi VAPictureParameterBufferVP8"
+vp8_vaapi_hwaccel_select="vp8_decoder"
 vp9_d3d11va_hwaccel_deps="d3d11va DXVA_PicParams_VP9"
 vp9_d3d11va_hwaccel_select="vp9_decoder"
 vp9_dxva2_hwaccel_deps="dxva2 DXVA_PicParams_VP9"
@@ -5588,6 +5590,7 @@  check_type "va/va.h va/va_vpp.h" "VAProcPipelineParameterBuffer"
 check_type "va/va.h va/va_enc_h264.h" "VAEncPictureParameterBufferH264"
 check_type "va/va.h va/va_enc_hevc.h" "VAEncPictureParameterBufferHEVC"
 check_type "va/va.h va/va_enc_jpeg.h" "VAEncPictureParameterBufferJPEG"
+check_type "va/va.h va/va_dec_vp8.h" "VAPictureParameterBufferVP8"
 
 check_type "vdpau/vdpau.h" "VdpPictureInfoHEVC"
 
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 5fdc97f..502872c 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -804,6 +804,7 @@  OBJS-$(CONFIG_VC1_VDPAU_HWACCEL)          += vdpau_vc1.o
 OBJS-$(CONFIG_VP9_D3D11VA_HWACCEL)        += dxva2_vp9.o
 OBJS-$(CONFIG_VP9_DXVA2_HWACCEL)          += dxva2_vp9.o
 OBJS-$(CONFIG_VP9_VAAPI_HWACCEL)          += vaapi_vp9.o
+OBJS-$(CONFIG_VP8_VAAPI_HWACCEL)          += vaapi_vp8.o
 
 # libavformat dependencies
 OBJS-$(CONFIG_ISO_MEDIA)               += mpeg4audio.o mpegaudiodata.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index ada9481..64a7ccd 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -125,6 +125,7 @@  void avcodec_register_all(void)
     REGISTER_HWACCEL(WMV3_DXVA2,        wmv3_dxva2);
     REGISTER_HWACCEL(WMV3_VAAPI,        wmv3_vaapi);
     REGISTER_HWACCEL(WMV3_VDPAU,        wmv3_vdpau);
+    REGISTER_HWACCEL(VP8_VAAPI,         vp8_vaapi);
 
     /* video codecs */
     REGISTER_ENCODER(A64MULTI,          a64multi);
diff --git a/libavcodec/vaapi.c b/libavcodec/vaapi.c
index 36db640..e440e8f 100644
--- a/libavcodec/vaapi.c
+++ b/libavcodec/vaapi.c
@@ -59,6 +59,7 @@  int ff_vaapi_context_init(AVCodecContext *avctx)
     vactx->pic_param_buf_id     = VA_INVALID_ID;
     vactx->iq_matrix_buf_id     = VA_INVALID_ID;
     vactx->bitplane_buf_id      = VA_INVALID_ID;
+    vactx->prob_buf_id          = VA_INVALID_ID;
 
     return 0;
 }
@@ -70,7 +71,7 @@  int ff_vaapi_context_fini(AVCodecContext *avctx)
 
 int ff_vaapi_render_picture(FFVAContext *vactx, VASurfaceID surface)
 {
-    VABufferID va_buffers[3];
+    VABufferID va_buffers[4];
     unsigned int n_va_buffers = 0;
 
     if (vactx->pic_param_buf_id == VA_INVALID_ID)
@@ -89,6 +90,11 @@  int ff_vaapi_render_picture(FFVAContext *vactx, VASurfaceID surface)
         va_buffers[n_va_buffers++] = vactx->bitplane_buf_id;
     }
 
+    if (vactx->prob_buf_id != VA_INVALID_ID) {
+        vaUnmapBuffer(vactx->display, vactx->prob_buf_id);
+        va_buffers[n_va_buffers++] = vactx->prob_buf_id;
+    }
+
     if (vaBeginPicture(vactx->display, vactx->context_id,
                        surface) != VA_STATUS_SUCCESS)
         return -1;
@@ -175,6 +181,11 @@  uint8_t *ff_vaapi_alloc_bitplane(FFVAContext *vactx, uint32_t size)
     return alloc_buffer(vactx, VABitPlaneBufferType, size, &vactx->bitplane_buf_id);
 }
 
+uint8_t *ff_vaapi_alloc_prob_buffer(FFVAContext *vactx, uint32_t size)
+{
+    return alloc_buffer(vactx, VAProbabilityBufferType, size, &vactx->prob_buf_id);
+}
+
 VASliceParameterBufferBase *ff_vaapi_alloc_slice(FFVAContext *vactx, const uint8_t *buffer, uint32_t size)
 {
     uint8_t *slice_params;
@@ -209,10 +220,10 @@  VASliceParameterBufferBase *ff_vaapi_alloc_slice(FFVAContext *vactx, const uint8
 void ff_vaapi_common_end_frame(AVCodecContext *avctx)
 {
     FFVAContext * const vactx = ff_vaapi_get_context(avctx);
-
     destroy_buffers(vactx->display, &vactx->pic_param_buf_id, 1);
     destroy_buffers(vactx->display, &vactx->iq_matrix_buf_id, 1);
     destroy_buffers(vactx->display, &vactx->bitplane_buf_id, 1);
+    destroy_buffers(vactx->display, &vactx->prob_buf_id, 1);
     destroy_buffers(vactx->display, vactx->slice_buf_ids, vactx->n_slice_buf_ids);
     av_freep(&vactx->slice_buf_ids);
     av_freep(&vactx->slice_params);
diff --git a/libavcodec/vaapi.h b/libavcodec/vaapi.h
index 7a29f6f..4c86538 100644
--- a/libavcodec/vaapi.h
+++ b/libavcodec/vaapi.h
@@ -103,6 +103,15 @@  struct vaapi_context {
     uint32_t bitplane_buf_id;
 
     /**
+     * VAProbabilityBuffer ID (for VP-8 decoding)
+     *
+     * - encoding: unused
+     * - decoding: Set by libavcodec
+     */
+    attribute_deprecated
+    uint32_t prob_buf_id;
+
+    /**
      * Slice parameter/data buffer IDs
      *
      * - encoding: unused
diff --git a/libavcodec/vaapi_internal.h b/libavcodec/vaapi_internal.h
index 306ae13..76cc6b0 100644
--- a/libavcodec/vaapi_internal.h
+++ b/libavcodec/vaapi_internal.h
@@ -42,6 +42,7 @@  typedef struct {
     VABufferID pic_param_buf_id;        ///< Picture parameter buffer
     VABufferID iq_matrix_buf_id;        ///< Inverse quantiser matrix buffer
     VABufferID bitplane_buf_id;         ///< Bitplane buffer (for VC-1 decoding)
+    VABufferID prob_buf_id;             ///< Prob buffer (for VP-8 decoding)
     VABufferID *slice_buf_ids;          ///< Slice parameter/data buffers
     unsigned int n_slice_buf_ids;       ///< Number of effective slice buffers
     unsigned int slice_buf_ids_alloc;   ///< Number of allocated slice buffers
@@ -83,6 +84,8 @@  void *ff_vaapi_alloc_iq_matrix(FFVAContext *vactx, unsigned int size);
 /** Allocate a new bit-plane buffer */
 uint8_t *ff_vaapi_alloc_bitplane(FFVAContext *vactx, uint32_t size);
 
+/** Allocate a new prob buffer */
+uint8_t *ff_vaapi_alloc_prob_buffer(FFVAContext *vactx, uint32_t size);
 /**
  * Allocate a new slice descriptor for the input slice.
  *
diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
index c1c3eb7..fe53806 100644
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@@ -140,7 +140,7 @@  static VP8Frame *vp8_find_free_buffer(VP8Context *s)
         av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
         abort();
     }
-    if (frame->tf.f->data[0])
+    if (frame->tf.f->buf[0])
         vp8_release_frame(s, frame);
 
     return frame;
@@ -218,8 +218,9 @@  static void parse_segment_info(VP8Context *s)
     int i;
 
     s->segmentation.update_map = vp8_rac_get(c);
+    s->segmentation.update_feature_data = vp8_rac_get(c);
 
-    if (vp8_rac_get(c)) { // update segment feature data
+    if (s->segmentation.update_feature_data) { // update segment feature data
         s->segmentation.absolute_vals = vp8_rac_get(c);
 
         for (i = 0; i < 4; i++)
@@ -273,11 +274,14 @@  static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
         int size = AV_RL24(sizes + 3 * i);
         if (buf_size - size < 0)
             return -1;
+        s->coeff_partition_size[i] = size;
 
         ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
         buf      += size;
         buf_size -= size;
     }
+
+    s->coeff_partition_size[i] = buf_size;
     ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
 
     return 0;
@@ -333,6 +337,12 @@  static void vp8_get_quants(VP8Context *s)
         s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
         s->qmat[i].chroma_qmul[0]  = FFMIN(s->qmat[i].chroma_qmul[0], 132);
     }
+    s->quant.yac_qi = yac_qi;
+    s->quant.ydc_delta = ydc_delta;
+    s->quant.y2dc_delta = y2dc_delta;
+    s->quant.y2ac_delta = y2ac_delta;
+    s->quant.uvdc_delta = uvdc_delta;
+    s->quant.uvac_delta = uvac_delta;
 }
 
 /**
@@ -656,6 +666,7 @@  static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_si
     buf      += 3;
     buf_size -= 3;
 
+    s->header_partition_size = header_size;
     if (s->profile > 3)
         av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
 
@@ -719,9 +730,11 @@  static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_si
     s->filter.level     = vp8_rac_get_uint(c, 6);
     s->filter.sharpness = vp8_rac_get_uint(c, 3);
 
-    if ((s->lf_delta.enabled = vp8_rac_get(c)))
-        if (vp8_rac_get(c))
+    if ((s->lf_delta.enabled = vp8_rac_get(c))) {
+        s->lf_delta.update = vp8_rac_get(c);
+        if (s->lf_delta.update)
             update_lf_deltas(s);
+    }
 
     if (setup_partitions(s, buf, buf_size)) {
         av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
@@ -761,6 +774,12 @@  static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_si
         vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
     }
 
+    s->c.code_word = vp56_rac_renorm(&s->c);
+    s->coder_state_at_header_end.input     = s->c.buffer - (-s->c.bits / 8);
+    s->coder_state_at_header_end.range     = s->c.high;
+    s->coder_state_at_header_end.value     = s->c.code_word >> 16;
+    s->coder_state_at_header_end.bit_count = -s->c.bits % 8;
+
     return 0;
 }
 
@@ -2538,6 +2557,22 @@  int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
 
     if (ret < 0)
         goto err;
+    if (!is_vp7 && s->pix_fmt == AV_PIX_FMT_NONE) {
+        enum AVPixelFormat pix_fmts[] = {
+#if CONFIG_VP8_VAAPI_HWACCEL
+            AV_PIX_FMT_VAAPI,
+#endif
+            AV_PIX_FMT_YUV420P,
+            AV_PIX_FMT_NONE,
+        };
+
+        s->pix_fmt = ff_get_format(s->avctx, pix_fmts);
+        if (s->pix_fmt < 0) {
+            ret = AVERROR(EINVAL);
+            goto err;
+        }
+        avctx->pix_fmt = s->pix_fmt;
+    }
 
     prev_frame = s->framep[VP56_FRAME_CURRENT];
 
@@ -2557,7 +2592,7 @@  int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
 
     // release no longer referenced frames
     for (i = 0; i < 5; i++)
-        if (s->frames[i].tf.f->data[0] &&
+        if (s->frames[i].tf.f->buf[0] &&
             &s->frames[i] != prev_frame &&
             &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN]   &&
@@ -2613,52 +2648,65 @@  int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     if (avctx->codec->update_thread_context)
         ff_thread_finish_setup(avctx);
 
-    s->linesize   = curframe->tf.f->linesize[0];
-    s->uvlinesize = curframe->tf.f->linesize[1];
-
-    memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
-    /* Zero macroblock structures for top/top-left prediction
-     * from outside the frame. */
-    if (!s->mb_layout)
-        memset(s->macroblocks + s->mb_height * 2 - 1, 0,
-               (s->mb_width + 1) * sizeof(*s->macroblocks));
-    if (!s->mb_layout && s->keyframe)
-        memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
+    if (avctx->hwaccel) {
+        ret = avctx->hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
+        if (ret < 0)
+            goto err;
+        ret = avctx->hwaccel->decode_slice(avctx, avpkt->data, avpkt->size);
+        if (ret < 0)
+            goto err;
+        ret = avctx->hwaccel->end_frame(avctx);
+        if (ret < 0) {
+            av_log(NULL, AV_LOG_ERROR, "end_frame error");
+            goto err;
+        }
+    } else {
+        s->linesize   = curframe->tf.f->linesize[0];
+        s->uvlinesize = curframe->tf.f->linesize[1];
 
-    memset(s->ref_count, 0, sizeof(s->ref_count));
+        memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
+        /* Zero macroblock structures for top/top-left prediction
+         * from outside the frame. */
+        if (!s->mb_layout)
+            memset(s->macroblocks + s->mb_height * 2 - 1, 0,
+                (s->mb_width + 1) * sizeof(*s->macroblocks));
+        if (!s->mb_layout && s->keyframe)
+            memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
+
+        memset(s->ref_count, 0, sizeof(s->ref_count));
+
+        if (s->mb_layout == 1) {
+            // Make sure the previous frame has read its segmentation map,
+            // if we re-use the same map.
+            if (prev_frame && s->segmentation.enabled &&
+                !s->segmentation.update_map)
+                ff_thread_await_progress(&prev_frame->tf, 1, 0);
+            if (is_vp7)
+                vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
+            else
+                vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
+        }
 
-    if (s->mb_layout == 1) {
-        // Make sure the previous frame has read its segmentation map,
-        // if we re-use the same map.
-        if (prev_frame && s->segmentation.enabled &&
-            !s->segmentation.update_map)
-            ff_thread_await_progress(&prev_frame->tf, 1, 0);
-        if (is_vp7)
-            vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
+        if (avctx->active_thread_type == FF_THREAD_FRAME)
+            num_jobs = 1;
         else
-            vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
-    }
-
-    if (avctx->active_thread_type == FF_THREAD_FRAME)
-        num_jobs = 1;
-    else
-        num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
-    s->num_jobs   = num_jobs;
-    s->curframe   = curframe;
-    s->prev_frame = prev_frame;
-    s->mv_min.y   = -MARGIN;
-    s->mv_max.y   = ((s->mb_height - 1) << 6) + MARGIN;
-    for (i = 0; i < MAX_THREADS; i++) {
-        s->thread_data[i].thread_mb_pos = 0;
-        s->thread_data[i].wait_mb_pos   = INT_MAX;
-    }
-    if (is_vp7)
-        avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
+            num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
+        s->num_jobs   = num_jobs;
+        s->curframe   = curframe;
+        s->prev_frame = prev_frame;
+        s->mv_min.y   = -MARGIN;
+        s->mv_max.y   = ((s->mb_height - 1) << 6) + MARGIN;
+        for (i = 0; i < MAX_THREADS; i++) {
+            s->thread_data[i].thread_mb_pos = 0;
+            s->thread_data[i].wait_mb_pos   = INT_MAX;
+        }
+        if (is_vp7)
+            avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
                         num_jobs);
-    else
-        avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
+        else
+            avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
                         num_jobs);
-
+    }
     ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
     memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
 
@@ -2727,6 +2775,7 @@  int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
     int ret;
 
     s->avctx = avctx;
+    s->pix_fmt = AV_PIX_FMT_NONE;
     s->vp7   = avctx->codec->id == AV_CODEC_ID_VP7;
     avctx->pix_fmt = AV_PIX_FMT_YUV420P;
     avctx->internal->allocate_progress = 1;
@@ -2800,14 +2849,18 @@  static int vp8_decode_update_thread_context(AVCodecContext *dst,
         s->mb_width  = s_src->mb_width;
         s->mb_height = s_src->mb_height;
     }
-
     s->prob[0]      = s_src->prob[!s_src->update_probabilities];
     s->segmentation = s_src->segmentation;
     s->lf_delta     = s_src->lf_delta;
+    s->pix_fmt = s_src->pix_fmt;
+    s->mbskip_enabled = s_src->mbskip_enabled;
+    s->filter = s_src->filter;
     memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
+    s->num_coeff_partitions = s_src->num_coeff_partitions;
+    s->header_partition_size = s_src->header_partition_size;
 
     for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
-        if (s_src->frames[i].tf.f->data[0]) {
+        if (s_src->frames[i].tf.f->buf[0]) {
             int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
             if (ret < 0)
                 return ret;
diff --git a/libavcodec/vp8.h b/libavcodec/vp8.h
index 374e138..af7285f 100644
--- a/libavcodec/vp8.h
+++ b/libavcodec/vp8.h
@@ -136,6 +136,8 @@  typedef struct VP8intmv {
 typedef struct VP8Context {
     VP8ThreadData *thread_data;
     AVCodecContext *avctx;
+    enum AVPixelFormat pix_fmt;
+
     VP8Frame *framep[4];
     VP8Frame *next_framep[4];
     VP8Frame *curframe;
@@ -165,6 +167,7 @@  typedef struct VP8Context {
         uint8_t enabled;
         uint8_t absolute_vals;
         uint8_t update_map;
+        uint8_t update_feature_data;
         int8_t base_quant[4];
         int8_t filter_level[4];     ///< base loop filter level
     } segmentation;
@@ -192,9 +195,19 @@  typedef struct VP8Context {
         int16_t chroma_qmul[2];
     } qmat[4];
 
+    // Raw quantisation values, which may be needed by hwaccel decode.
     struct {
-        uint8_t enabled;    ///< whether each mb can have a different strength based on mode/ref
+        int yac_qi;
+        int ydc_delta;
+        int y2dc_delta;
+        int y2ac_delta;
+        int uvdc_delta;
+        int uvac_delta;
+    } quant;
 
+    struct {
+        uint8_t enabled;    ///< whether each mb can have a different strength based on mode/ref
+        uint8_t update;
         /**
          * filter strength adjustment for the following macroblock modes:
          * [0-3] - i16x16 (always zero)
@@ -221,6 +234,19 @@  typedef struct VP8Context {
 
     VP56RangeCoder c;   ///< header context, includes mb modes and motion vectors
 
+    /* This contains the entropy coder state at the end of the header
+     * block, in the form specified by the standard.  For use by
+     * hwaccels, so that a hardware decoder has the information to
+     * start decoding at the macroblock layer.
+    */
+    struct {
+        const uint8_t *input;
+        uint32_t range;
+        uint32_t value;
+        int bit_count;
+    } coder_state_at_header_end;
+
+    int header_partition_size;
     /**
      * These are all of the updatable probabilities for binary decisions.
      * They are only implicitly reset on keyframes, making it quite likely
@@ -258,6 +284,7 @@  typedef struct VP8Context {
      */
     int num_coeff_partitions;
     VP56RangeCoder coeff_partition[8];
+    int coeff_partition_size[8];
     VideoDSPContext vdsp;
     VP8DSPContext vp8dsp;
     H264PredContext hpc;