diff mbox series

[FFmpeg-devel,1/2] avcodec: add av1 hardware acceleration decoder

Message ID 20200821052950.18785-1-fei.w.wang@intel.com
State New
Headers show
Series [FFmpeg-devel,1/2] avcodec: add av1 hardware acceleration decoder | expand

Checks

Context Check Description
andriy/default pending
andriy/make success Make finished
andriy/make_fate success Make fate finished

Commit Message

Fei Wang Aug. 21, 2020, 5:29 a.m. UTC
This av1 decoder is now only used for av1 hardware acceleration
decoder. Consider it can be extend to a local decoder like hevc
or vp9 in the future, so define its name as "av1" and put it into
external libraries codec list.

Signed-off-by: Fei Wang <fei.w.wang@intel.com>
---
 Changelog              |   1 +
 configure              |   1 +
 libavcodec/Makefile    |   1 +
 libavcodec/allcodecs.c |   1 +
 libavcodec/av1dec.c    | 746 +++++++++++++++++++++++++++++++++++++++++
 libavcodec/av1dec.h    |  89 +++++
 libavcodec/version.h   |   2 +-
 7 files changed, 840 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/av1dec.c
 create mode 100644 libavcodec/av1dec.h

Comments

James Almer Aug. 21, 2020, 2:42 p.m. UTC | #1
On 8/21/2020 2:29 AM, Fei Wang wrote:
> This av1 decoder is now only used for av1 hardware acceleration
> decoder. Consider it can be extend to a local decoder like hevc
> or vp9 in the future, so define its name as "av1" and put it into
> external libraries codec list.
> 
> Signed-off-by: Fei Wang <fei.w.wang@intel.com>
> ---
>  Changelog              |   1 +
>  configure              |   1 +
>  libavcodec/Makefile    |   1 +
>  libavcodec/allcodecs.c |   1 +
>  libavcodec/av1dec.c    | 746 +++++++++++++++++++++++++++++++++++++++++
>  libavcodec/av1dec.h    |  89 +++++
>  libavcodec/version.h   |   2 +-
>  7 files changed, 840 insertions(+), 1 deletion(-)
>  create mode 100644 libavcodec/av1dec.c
>  create mode 100644 libavcodec/av1dec.h
> 
> diff --git a/Changelog b/Changelog
> index 1efc768387..3ff88cc12f 100644
> --- a/Changelog
> +++ b/Changelog
> @@ -14,6 +14,7 @@ version <next>:
>  - ADPCM Argonaut Games encoder
>  - Argonaut Games ASF muxer
>  - AV1 Low overhead bitstream format demuxer
> +- AV1 decoder (Hardware acceleration used only)
>  
>  
>  version 4.3:
> diff --git a/configure b/configure
> index 6faff9bc7b..ef71e47c4e 100755
> --- a/configure
> +++ b/configure
> @@ -2685,6 +2685,7 @@ atrac3al_decoder_select="mdct"
>  atrac3p_decoder_select="mdct sinewin"
>  atrac3pal_decoder_select="mdct sinewin"
>  atrac9_decoder_select="mdct"
> +av1_decoder_select="cbs_av1"
>  avrn_decoder_select="exif jpegtables"
>  bink_decoder_select="blockdsp hpeldsp"
>  binkaudio_dct_decoder_select="mdct rdft dct sinewin wma_freqs"
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index 3431ba2dca..2371039bd2 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -764,6 +764,7 @@ OBJS-$(CONFIG_ZLIB_DECODER)            += lcldec.o
>  OBJS-$(CONFIG_ZLIB_ENCODER)            += lclenc.o
>  OBJS-$(CONFIG_ZMBV_DECODER)            += zmbv.o
>  OBJS-$(CONFIG_ZMBV_ENCODER)            += zmbvenc.o
> +OBJS-$(CONFIG_AV1_DECODER)             += av1dec.o
>  
>  # (AD)PCM decoders/encoders
>  OBJS-$(CONFIG_PCM_ALAW_DECODER)           += pcm.o
> diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
> index 4bd830e5d0..00799d3431 100644
> --- a/libavcodec/allcodecs.c
> +++ b/libavcodec/allcodecs.c
> @@ -761,6 +761,7 @@ extern AVCodec ff_idf_decoder;
>   * above is available */
>  extern AVCodec ff_aac_mf_encoder;
>  extern AVCodec ff_ac3_mf_encoder;
> +extern AVCodec ff_av1_decoder;
>  extern AVCodec ff_h263_v4l2m2m_encoder;
>  extern AVCodec ff_libaom_av1_decoder;
>  extern AVCodec ff_libopenh264_encoder;
> diff --git a/libavcodec/av1dec.c b/libavcodec/av1dec.c
> new file mode 100644
> index 0000000000..ab88ed987a
> --- /dev/null
> +++ b/libavcodec/av1dec.c
> @@ -0,0 +1,746 @@
> +/*
> + * AV1video decoder
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "avcodec.h"
> +#include "get_bits.h"
> +#include "hwconfig.h"
> +#include "internal.h"
> +#include "profiles.h"
> +#include "thread.h"
> +#include "videodsp.h"
> +#include "libavutil/avassert.h"
> +#include "libavutil/pixdesc.h"
> +
> +#include "av1dec.h"
> +#include "libavformat/av1.h"
> +
> +static void setup_past_independence(AV1Frame *f)
> +{
> +    f->loop_filter_delta_enabled = 1;
> +
> +    f->loop_filter_ref_deltas[AV1_REF_FRAME_INTRA] = 1;
> +    f->loop_filter_ref_deltas[AV1_REF_FRAME_LAST] = 0;
> +    f->loop_filter_ref_deltas[AV1_REF_FRAME_LAST2] = 0;
> +    f->loop_filter_ref_deltas[AV1_REF_FRAME_LAST3] = 0;
> +    f->loop_filter_ref_deltas[AV1_REF_FRAME_GOLDEN] = -1;
> +    f->loop_filter_ref_deltas[AV1_REF_FRAME_BWDREF] = 0;
> +    f->loop_filter_ref_deltas[AV1_REF_FRAME_ALTREF2] = -1;
> +    f->loop_filter_ref_deltas[AV1_REF_FRAME_ALTREF] = -1;
> +
> +    f->loop_filter_mode_deltas[0] = 0;
> +    f->loop_filter_mode_deltas[1] = 0;
> +}
> +
> +static void load_previous_and_update(AV1DecContext *s)
> +{
> +    int i;
> +
> +    memcpy(s->cur_frame.loop_filter_ref_deltas,
> +           s->ref[s->raw_frame_header.primary_ref_frame].loop_filter_ref_deltas,
> +           AV1_TOTAL_REFS_PER_FRAME * sizeof(int8_t));
> +    memcpy(s->cur_frame.loop_filter_mode_deltas,
> +           s->ref[s->raw_frame_header.primary_ref_frame].loop_filter_mode_deltas,
> +           2 * sizeof(int8_t));
> +
> +    if (s->raw_frame_header.loop_filter_delta_update) {
> +        for (i = 0; i < AV1_TOTAL_REFS_PER_FRAME; i++) {
> +            if (s->raw_frame_header.update_ref_delta[i])
> +                s->cur_frame.loop_filter_ref_deltas[i] =
> +                    s->raw_frame_header.loop_filter_ref_deltas[i];
> +        }
> +
> +        for (i = 0; i < 2; i++) {
> +            if (s->raw_frame_header.update_mode_delta[i])
> +                s->cur_frame.loop_filter_mode_deltas[i] =
> +                    s->raw_frame_header.loop_filter_mode_deltas[i];
> +        }
> +    }
> +
> +    s->cur_frame.loop_filter_delta_enabled =
> +        s->raw_frame_header.loop_filter_delta_enabled;
> +}
> +
> +static uint32_t inverse_recenter(int r, uint32_t v)
> +{
> +    if (v > 2 * r)
> +        return v;
> +    else if (v & 1)
> +        return r - ((v + 1) >> 1);
> +    else
> +        return r + (v >> 1);
> +}
> +
> +static uint32_t decode_unsigned_subexp_with_ref(uint32_t sub_exp,
> +                                                int mx, int r)
> +{
> +    if ((r << 1) <= mx) {
> +        return inverse_recenter(r, sub_exp);
> +    } else {
> +        return mx - 1 - inverse_recenter(mx - 1 - r, sub_exp);
> +    }
> +}
> +
> +static int32_t decode_signed_subexp_with_ref(uint32_t sub_exp, int low,
> +                                             int high, int r)
> +{
> +    int32_t x = decode_unsigned_subexp_with_ref(sub_exp, high - low, r - low);
> +    return x + low;
> +}
> +
> +static void read_global_param(AV1DecContext *s, int type, int ref, int idx)
> +{
> +    uint32_t abs_bits, prec_bits, round, prec_diff, sub, mx, r;
> +    abs_bits = AV1_GM_ABS_ALPHA_BITS;
> +    prec_bits = AV1_GM_ALPHA_PREC_BITS;
> +
> +    if (idx < 2) {
> +        if (type == AV1_WARP_MODEL_TRANSLATION) {
> +            abs_bits = AV1_GM_ABS_TRANS_ONLY_BITS -
> +                !s->raw_frame_header.allow_high_precision_mv;
> +            prec_bits = AV1_GM_TRANS_ONLY_PREC_BITS -
> +                !s->raw_frame_header.allow_high_precision_mv;
> +        } else {
> +            abs_bits = AV1_GM_ABS_TRANS_BITS;
> +            prec_bits = AV1_GM_TRANS_PREC_BITS;
> +        }
> +    }
> +    round = (idx % 3) == 2 ? (1 << AV1_WARPEDMODEL_PREC_BITS) : 0;
> +    prec_diff = AV1_WARPEDMODEL_PREC_BITS - prec_bits;
> +    sub = (idx % 3) == 2 ? (1 << prec_bits) : 0;
> +    mx = 1 << abs_bits;
> +    r = (s->ref[s->raw_frame_header.primary_ref_frame].gm_params[ref][idx]
> +         >> prec_diff) - sub;
> +
> +    s->cur_frame.gm_params[ref][idx] =
> +        (decode_signed_subexp_with_ref(s->raw_frame_header.gm_params[ref][idx],
> +                                       -mx, mx + 1, r) << prec_diff) + round;
> +}
> +
> +/**
> +* update gm type/params, since cbs already implemented part of this funcation,
> +* so we don't need to full implement spec.

FYI, the gm params code in cbs_av1 is currently buggy for some streams,
and it needs fixing.

> +*/
> +static void global_motion_params(AV1DecContext *s)
> +{
> +    const AV1RawFrameHeader *header = &s->raw_frame_header;
> +    int type;
> +    int i, j;
> +    for (i = AV1_REF_FRAME_LAST; i <= AV1_REF_FRAME_ALTREF; i++) {
> +        s->cur_frame.gm_type[i] = AV1_WARP_MODEL_IDENTITY;
> +        for (j = 0; j <= 5; j++)
> +            s->cur_frame.gm_params[i][j] = (j % 3 == 2) ?
> +                                           1 << AV1_WARPEDMODEL_PREC_BITS : 0;
> +    }
> +    if (header->frame_type == AV1_FRAME_KEY ||
> +        header->frame_type == AV1_FRAME_INTRA_ONLY)
> +        return;
> +
> +    for (i = AV1_REF_FRAME_LAST; i <= AV1_REF_FRAME_ALTREF; i++) {
> +        if (header->is_global[i]) {
> +            if (header->is_rot_zoom[i]) {
> +                type = AV1_WARP_MODEL_ROTZOOM;
> +            } else {
> +                type = header->is_translation[i] ? AV1_WARP_MODEL_TRANSLATION
> +                                                 : AV1_WARP_MODEL_AFFINE;
> +            }
> +        } else {
> +            type = AV1_WARP_MODEL_IDENTITY;
> +        }
> +        s->cur_frame.gm_type[i] = type;
> +
> +        if (type >= AV1_WARP_MODEL_ROTZOOM) {
> +            read_global_param(s, type, i, 2);
> +            read_global_param(s, type, i, 3);
> +            if (type == AV1_WARP_MODEL_AFFINE) {
> +                read_global_param(s, type, i, 4);
> +                read_global_param(s, type, i, 5);
> +            } else {
> +                s->cur_frame.gm_params[i][4] = - s->cur_frame.gm_params[i][3];
> +                s->cur_frame.gm_params[i][5] = - s->cur_frame.gm_params[i][2];
> +            }
> +        }
> +        if (type >= AV1_WARP_MODEL_TRANSLATION) {
> +            read_global_param(s, type, i, 0);
> +            read_global_param(s, type, i, 1);
> +        }
> +    }
> +}
> +
> +static int get_tiles_info(AVCodecContext *avctx,
> +                          AV1RawTileGroup *tile_group,
> +                          uint32_t tile_group_offset)
> +{
> +    AV1DecContext *s = avctx->priv_data;
> +    GetBitContext gb;
> +    uint16_t tile_num, tile_row, tile_col;
> +    uint16_t mi_cols, mi_rows, sb_cols, sb_rows, tile_width_sb, tile_height_sb;
> +    uint32_t size = 0, size_bytes = 0, offset = 0;
> +    int ret = 0;
> +
> +    if ((ret = init_get_bits8(&gb,
> +                              tile_group->tile_data.data,
> +                              tile_group->tile_data.data_size)) < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "Fail to initialize bitstream reader.\n");
> +        return ret;
> +    }
> +
> +    s->tg_start = tile_group->tg_start;
> +    s->tg_end = tile_group->tg_end;
> +
> +    if (s->raw_frame_header.uniform_tile_spacing_flag) {
> +        mi_cols = 2 * ((s->raw_seq.max_frame_width_minus_1 + 8) >> 3);
> +        mi_rows = 2 * ((s->raw_seq.max_frame_height_minus_1 + 8) >> 3);
> +        sb_cols = s->raw_seq.use_128x128_superblock ? ((mi_cols + 31) >> 5)
> +                                                    : ((mi_cols + 15) >> 4);
> +        sb_rows = s->raw_seq.use_128x128_superblock ? ((mi_rows + 31) >> 5)
> +                                                    : ((mi_rows + 15) >> 4);
> +        tile_width_sb = (sb_cols + (1 << s->raw_frame_header.tile_cols_log2)
> +                         -1) >> s->raw_frame_header.tile_cols_log2;
> +        tile_height_sb = (sb_rows + (1 << s->raw_frame_header.tile_rows_log2)
> +                         -1) >> s->raw_frame_header.tile_rows_log2;
> +    }
> +
> +    for (tile_num = tile_group->tg_start; tile_num <= tile_group->tg_end; tile_num++) {
> +        tile_row = tile_num / s->raw_frame_header.tile_cols;
> +        tile_col = tile_num % s->raw_frame_header.tile_cols;
> +
> +        if (tile_num == tile_group->tg_end) {
> +            s->tile_group_info[tile_num].tile_group_offset = tile_group_offset;
> +            s->tile_group_info[tile_num].tile_size = get_bits_left(&gb) / 8;
> +            s->tile_group_info[tile_num].tile_offset = offset;
> +            s->tile_group_info[tile_num].tile_row = tile_row;
> +            s->tile_group_info[tile_num].tile_column = tile_col;
> +            s->tile_group_info[tile_num].tile_size_bytes = size_bytes;
> +            if (s->raw_frame_header.uniform_tile_spacing_flag) {
> +                s->tile_group_info[tile_num].tile_width_sbs =
> +                    (tile_col + 1) == s->raw_frame_header.tile_cols ?
> +                    sb_cols - tile_col * tile_width_sb :
> +                    tile_width_sb;
> +                s->tile_group_info[tile_num].tile_height_sbs =
> +                    (tile_row + 1) == s->raw_frame_header.tile_rows ?
> +                    sb_rows - tile_row * tile_height_sb :
> +                    tile_height_sb;
> +            } else {
> +                s->tile_group_info[tile_num].tile_width_sbs =
> +                    s->raw_frame_header.width_in_sbs_minus_1[tile_col] + 1;
> +                s->tile_group_info[tile_num].tile_height_sbs =
> +                    s->raw_frame_header.height_in_sbs_minus_1[tile_row] + 1;
> +            }
> +            return 0;
> +        }
> +        size_bytes = s->raw_frame_header.tile_size_bytes_minus1 + 1;
> +        size = get_bits_le(&gb, size_bytes * 8) + 1;
> +        skip_bits(&gb, size * 8);
> +
> +        offset += size_bytes;
> +
> +        s->tile_group_info[tile_num].tile_group_offset = tile_group_offset;
> +        s->tile_group_info[tile_num].tile_size = size;
> +        s->tile_group_info[tile_num].tile_offset = offset;
> +        s->tile_group_info[tile_num].tile_row = tile_row;
> +        s->tile_group_info[tile_num].tile_column = tile_col;
> +        s->tile_group_info[tile_num].tile_size_bytes = size_bytes;
> +        if (s->raw_frame_header.uniform_tile_spacing_flag) {
> +            s->tile_group_info[tile_num].tile_width_sbs =
> +                (tile_col + 1) == s->raw_frame_header.tile_cols ?
> +                sb_cols - tile_col * tile_width_sb :
> +                tile_width_sb;
> +            s->tile_group_info[tile_num].tile_height_sbs =
> +                (tile_row + 1) == s->raw_frame_header.tile_rows ?
> +                sb_rows - tile_row * tile_height_sb :
> +                tile_height_sb;
> +        } else {
> +            s->tile_group_info[tile_num].tile_width_sbs =
> +                s->raw_frame_header.width_in_sbs_minus_1[tile_col] + 1;
> +            s->tile_group_info[tile_num].tile_height_sbs =
> +                s->raw_frame_header.height_in_sbs_minus_1[tile_row] + 1;
> +        }
> +        offset += size;
> +    }
> +
> +    return 0;
> +
> +}
> +
> +static int get_pixel_format(AVCodecContext *avctx)
> +{
> +    AV1DecContext *s = avctx->priv_data;
> +    const AV1RawSequenceHeader *seq = &s->raw_seq;
> +    uint8_t bit_depth;
> +    int ret = 0;
> +    enum AVPixelFormat pix_fmt = AV_PIX_FMT_NONE;
> +#define HWACCEL_MAX (0)
> +    enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
> +
> +    if (seq->seq_profile == 2 && seq->color_config.high_bitdepth)
> +        bit_depth = seq->color_config.twelve_bit ? 12 : 10;
> +    else if (seq->seq_profile <= 2)
> +        bit_depth = seq->color_config.high_bitdepth ? 10 : 8;
> +    else {
> +        av_log(avctx, AV_LOG_ERROR,
> +               "Unknow av1 profile %d.\n", seq->seq_profile);
> +        return -1;
> +    }
> +
> +    if (!seq->color_config.mono_chrome) {
> +        // 4:4:4 x:0 y:0, 4:2:2 x:1 y:0, 4:2:0 x:1 y:1
> +        if (seq->color_config.subsampling_x == 0 &&
> +            seq->color_config.subsampling_y == 0) {
> +            if (bit_depth == 8)
> +                pix_fmt = AV_PIX_FMT_YUV444P;
> +            else if (bit_depth == 10)
> +                pix_fmt = AV_PIX_FMT_YUV444P10;
> +            else if (bit_depth == 12)
> +                pix_fmt = AV_PIX_FMT_YUV444P12;
> +            else
> +                av_log(avctx, AV_LOG_WARNING, "Unknow av1 pixel format.\n");
> +        } else if (seq->color_config.subsampling_x == 1 &&
> +                   seq->color_config.subsampling_y == 0) {
> +            if (bit_depth == 8)
> +                pix_fmt = AV_PIX_FMT_YUV422P;
> +            else if (bit_depth == 10)
> +                pix_fmt = AV_PIX_FMT_YUV422P10;
> +            else if (bit_depth == 12)
> +                pix_fmt = AV_PIX_FMT_YUV422P12;
> +            else
> +                av_log(avctx, AV_LOG_WARNING, "Unknow av1 pixel format.\n");
> +        } else if (seq->color_config.subsampling_x == 1 &&
> +                   seq->color_config.subsampling_y == 1) {
> +            if (bit_depth == 8)
> +                pix_fmt = AV_PIX_FMT_YUV420P;
> +            else if (bit_depth == 10)
> +                pix_fmt = AV_PIX_FMT_YUV420P10;
> +            else if (bit_depth == 12)
> +                pix_fmt = AV_PIX_FMT_YUV420P12;
> +            else
> +                av_log(avctx, AV_LOG_WARNING, "Unknow av1 pixel format.\n");
> +        }
> +    } else {
> +        if (seq->color_config.subsampling_x == 1 &&
> +            seq->color_config.subsampling_y == 1)
> +            pix_fmt = AV_PIX_FMT_YUV440P;
> +        else
> +            av_log(avctx, AV_LOG_WARNING, "Unknow av1 pixel format.\n");
> +    }
> +
> +    av_log(avctx, AV_LOG_DEBUG, "Av1 decode get format: %s.\n",
> +           av_get_pix_fmt_name(pix_fmt));
> +
> +    if (pix_fmt == AV_PIX_FMT_NONE)
> +        return -1;
> +    s->pix_fmt = pix_fmt;
> +
> +    *fmtp++ = s->pix_fmt;
> +    *fmtp = AV_PIX_FMT_NONE;
> +    ret = ff_thread_get_format(avctx, pix_fmts);
> +    if (ret < 0)
> +        return ret;
> +
> +    avctx->pix_fmt = ret;

s->pix_fmt and avctx->pix_fmt may not be in sync after this.

Do you really need the former at all?

> +
> +    return 0;
> +}
> +
> +static void av1_frame_unref(AVCodecContext *avctx, AV1Frame *f)
> +{
> +    ff_thread_release_buffer(avctx, &f->tf);
> +    av_buffer_unref(&f->hwaccel_priv_buf);
> +    f->hwaccel_picture_private = NULL;
> +}
> +
> +static int av1_frame_ref(AVCodecContext *avctx, AV1Frame *dst, AV1Frame *src)
> +{
> +    int ret = 0;
> +
> +    ret = ff_thread_ref_frame(&dst->tf, &src->tf);
> +    if (ret < 0)
> +        return ret;
> +
> +    if (src->hwaccel_picture_private) {
> +        dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
> +        if (!dst->hwaccel_priv_buf)
> +            goto fail;
> +        dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
> +    }
> +
> +    dst->loop_filter_delta_enabled = src->loop_filter_delta_enabled;
> +    memcpy(dst->loop_filter_ref_deltas,
> +           src->loop_filter_ref_deltas,
> +           AV1_TOTAL_REFS_PER_FRAME * sizeof(int8_t));
> +    memcpy(dst->loop_filter_mode_deltas,
> +           src->loop_filter_mode_deltas,
> +           2 * sizeof(int8_t));
> +    memcpy(dst->gm_type,
> +           src->gm_type,
> +           AV1_TOTAL_REFS_PER_FRAME * sizeof(uint8_t));
> +    memcpy(dst->gm_params,
> +           src->gm_params,
> +           AV1_TOTAL_REFS_PER_FRAME * 6 * sizeof(int8_t));
> +
> +    return 0;
> +
> +fail:
> +    av1_frame_unref(avctx, dst);
> +    return AVERROR(ENOMEM);
> +}
> +
> +static av_cold int av1_decode_free(AVCodecContext *avctx)
> +{
> +    AV1DecContext *s = avctx->priv_data;
> +    int i;
> +
> +    for (i = 0; i < FF_ARRAY_ELEMS(s->ref); i++) {
> +        if (s->ref[i].tf.f->buf[0])
> +            av1_frame_unref(avctx, &s->ref[i]);
> +        av_frame_free(&s->ref[i].tf.f);
> +    }
> +    if (s->cur_frame.tf.f->buf[0])
> +        av1_frame_unref(avctx, &s->cur_frame);
> +    av_frame_free(&s->cur_frame.tf.f);
> +
> +    ff_cbs_fragment_free(&s->current_obu);
> +    ff_cbs_close(&s->cbc);
> +
> +    return 0;
> +}
> +
> +static av_cold int av1_decode_init(AVCodecContext *avctx)
> +{
> +    AV1DecContext *s = avctx->priv_data;
> +    int i;
> +    int ret = 0;
> +
> +    for (i = 0; i < FF_ARRAY_ELEMS(s->ref); i++) {
> +        s->ref[i].tf.f = av_frame_alloc();
> +        if (!s->ref[i].tf.f) {
> +            av1_decode_free(avctx);
> +            av_log(avctx, AV_LOG_ERROR,
> +                   "Fail to allocate frame buffer %d.\n", i);
> +            return AVERROR(ENOMEM);
> +        }
> +    }
> +
> +        s->cur_frame.tf.f = av_frame_alloc();
> +        if (!s->cur_frame.tf.f) {
> +            av1_decode_free(avctx);
> +            av_log(avctx, AV_LOG_ERROR, "Fail to allocate frame buffer.\n");
> +            return AVERROR(ENOMEM);
> +        }

Wrong indentation.

> +
> +    s->avctx = avctx;
> +    s->pix_fmt = AV_PIX_FMT_NONE;
> +
> +    ret = ff_cbs_init(&s->cbc, AV_CODEC_ID_AV1, avctx);
> +    if (ret < 0)
> +        return ret;
> +    return 0;
> +}
> +
> +static int av1_frame_alloc(AVCodecContext *avctx, AV1Frame *f)
> +{
> +    int ret;
> +    if ((ret = ff_thread_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
> +        return ret;
> +
> +    if (avctx->hwaccel) {
> +        const AVHWAccel *hwaccel = avctx->hwaccel;
> +        if (hwaccel->frame_priv_data_size) {
> +            f->hwaccel_priv_buf =
> +                av_buffer_allocz(hwaccel->frame_priv_data_size);
> +            if (!f->hwaccel_priv_buf)
> +                goto fail;
> +            f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
> +        }
> +    }
> +    return 0;
> +
> +fail:
> +    av1_frame_unref(avctx, f);
> +    return AVERROR(ENOMEM);
> +}
> +
> +static int set_output_frame(AVFrame *srcframe, AVFrame *frame, AVPacket *pkt)
> +{
> +    int ret = 0;
> +    if ((ret = av_frame_ref(frame, srcframe)) < 0) {
> +        return ret;
> +    }
> +
> +    frame->pts = pkt->pts;
> +    frame->pkt_dts = pkt->dts;

Did you add this because you saw bogus timestamps filled by
ff_thread_get_buffer? If so, could you try applying
https://patchwork.ffmpeg.org/project/ffmpeg/list/?series=2057 and
removing this chunk, to see if it fixes it?

> +
> +    return 0;
> +}
> +
> +static int update_reference_list (AVCodecContext *avctx)
> +{
> +    AV1DecContext *s = avctx->priv_data;
> +    AV1RawFrameHeader *header= &s->raw_frame_header;
> +    int i;
> +    int ret = 0;
> +
> +    for (i = 0; i < 8; i++) {
> +        if (header->frame_type == AV1_FRAME_KEY ||
> +            (header->refresh_frame_flags & (1 << i))) {
> +            if (s->ref[i].tf.f->buf[0])
> +                av1_frame_unref(avctx, &s->ref[i]);
> +            if ((ret = av1_frame_ref(avctx, &s->ref[i], &s->cur_frame)) < 0) {
> +                av_log(avctx, AV_LOG_DEBUG, "Ref frame error:%d.\n", ret);
> +                return ret;
> +            }
> +        }
> +    }
> +    return 0;
> +}
> +
> +static int get_current_frame(AVCodecContext *avctx)
> +{
> +    AV1DecContext *s = avctx->priv_data;
> +    int ret = 0;
> +
> +    if (s->cur_frame.tf.f->buf[0])
> +        av1_frame_unref(avctx, &s->cur_frame);
> +
> +    ret = av1_frame_alloc(avctx, &s->cur_frame);
> +    if (ret < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "Get current frame fail.\n");
> +        return ret;
> +    }
> +
> +    if (s->raw_frame_header.primary_ref_frame == AV1_PRIMARY_REF_NONE)
> +        setup_past_independence(&s->cur_frame);
> +    else
> +        load_previous_and_update(s);
> +
> +    global_motion_params(s);
> +
> +    return ret;
> +}
> +
> +static int decode_current_frame(AVCodecContext *avctx)
> +{
> +    int ret = 0;
> +
> +    if (avctx->hwaccel) {
> +        ret = avctx->hwaccel->start_frame(avctx, NULL, 0);

You should pass this a buffer. Some hwaccel implementation may need it.

> +        if (ret < 0) {
> +            av_log(avctx, AV_LOG_ERROR, "HW accel strat frame fail.\n");
> +            return ret;
> +        }
> +
> +        ret = avctx->hwaccel->end_frame(avctx);

Shouldn't hwaccel->decode_slice() be called before this, and not after?

> +        if (ret < 0) {
> +            av_log(avctx, AV_LOG_ERROR, "HW accel end frame fail.\n");
> +            return ret;
> +        }
> +    } else {
> +        av_log(avctx, AV_LOG_ERROR, "Your platform doesn't suppport hardware "
> +               "acceleration AV1 decode.\n");
> +        return -1;
> +    }
> +
> +    ret = update_reference_list(avctx);
> +    if (ret < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "Fail to update reference list.\n");
> +        return ret;
> +    }
> +    return 0;
> +}
> +
> +static int av1_decode_frame(AVCodecContext *avctx, void *frame,
> +                            int *got_frame, AVPacket *pkt)
> +{
> +    AV1DecContext *s = avctx->priv_data;
> +    AV1RawSequenceHeader* raw_seq = NULL;
> +    AV1RawFrameHeader *raw_frame_header = NULL;
> +    AV1RawTileGroup *raw_tile_group = NULL;
> +    uint32_t tile_group_offset = 0;

Should be ptrdiff_t.

> +    int i;

Just do for (int i...) below.

> +    int ret = 0;
> +
> +    ret = ff_cbs_read_packet(s->cbc, &s->current_obu, pkt);
> +    if (ret < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "Fail to read packet.\n");
> +        goto end;
> +    }
> +    av_log(avctx, AV_LOG_DEBUG, "Total obu for this frame:%d.\n",
> +           s->current_obu.nb_units);
> +
> +    for (i = 0; i < s->current_obu.nb_units; i++) {
> +        CodedBitstreamUnit *unit = &s->current_obu.units[i];
> +        AV1RawOBU *obu = unit->content;
> +        av_log(avctx, AV_LOG_DEBUG, "Obu idx:%d, obu type:%d.\n", i, unit->type);
> +
> +        switch (unit->type) {
> +        case AV1_OBU_SEQUENCE_HEADER:
> +            raw_seq = &obu->obu.sequence_header;
> +            memcpy(&s->raw_seq, raw_seq, sizeof(AV1RawSequenceHeader));

You can use
(CodedBitstreamAV1Context*)(s->cbc->priv_data)->sequence_header
for this instead. And if you'd rather not, you can always create a new
reference to unit->content_ref and store it in AV1DecContext.

Also, consider adding a avctx->hwaccel->decode_params() call here as
well, in case some future hwaccel decides/requires to implement it.

> +            if (s->pix_fmt == AV_PIX_FMT_NONE) {
> +                ret = get_pixel_format(avctx);
> +                if (ret < 0) {
> +                    av_log(avctx, AV_LOG_ERROR,
> +                           "Fail to get format from sequence header.\n");
> +                    goto end;
> +                }
> +            }
> +            break;
> +        case AV1_OBU_FRAME_HEADER:
> +        case AV1_OBU_REDUNDANT_FRAME_HEADER:
> +            raw_frame_header = &obu->obu.frame_header;
> +            memcpy (&s->raw_frame_header,
> +                    raw_frame_header,
> +                    sizeof(AV1RawFrameHeader));

Same as above, you can create a new reference to unit->content_ref
instead. memcpy should be avoided as much as possible, especially in
this case which is run on every frame.

> +            s->tile_num =
> +                raw_frame_header->tile_cols * raw_frame_header->tile_rows;
> +            if (raw_frame_header->show_existing_frame) {
> +                if (set_output_frame(s->ref[raw_frame_header->frame_to_show_map_idx].tf.f,
> +                                     frame, pkt) < 0) {
> +                    av_log(avctx, AV_LOG_DEBUG, "Set output frame error:%d.\n",
> +                           ret);
> +                    goto end;
> +                }
> +                *got_frame = 1;
> +                goto end;
> +            } else {
> +                ret = get_current_frame(avctx);
> +                if (ret < 0) {
> +                    av_log(avctx, AV_LOG_DEBUG, "Get current frame error:%d.\n",
> +                           ret);
> +                    goto end;
> +                }
> +            }
> +            break;
> +        case AV1_OBU_FRAME:
> +            raw_frame_header = &obu->obu.frame.header;
> +            raw_tile_group = &obu->obu.frame.tile_group;
> +            memcpy(&s->raw_frame_header,
> +                   raw_frame_header,
> +                   sizeof(AV1RawFrameHeader));

Ditto.

> +            s->tile_num =
> +                raw_frame_header->tile_cols * raw_frame_header->tile_rows;
> +            tile_group_offset = raw_tile_group->tile_data.data - pkt->data;
> +            get_tiles_info(avctx, raw_tile_group, tile_group_offset);
> +
> +            ret = get_current_frame(avctx);
> +            if (ret < 0) {
> +                av_log(avctx, AV_LOG_DEBUG, "Get current frame error:%d.\n",
> +                       ret);
> +                goto end;
> +            }
> +            if (avctx->hwaccel) {
> +                ret = avctx->hwaccel->decode_slice(avctx, pkt->data, pkt->size);

Can't you pass raw_tile_group->tile_data.data or something less broad
than the entire packet?

> +                if (ret < 0) {
> +                    av_log(avctx, AV_LOG_ERROR,
> +                           "HW accel decode slice fail.\n");
> +                    return ret;
> +                }
> +            }
> +            ret = decode_current_frame(avctx);
> +            if (ret < 0) {
> +                av_log(avctx, AV_LOG_DEBUG, "Decode current frame error:%d.\n",
> +                       ret);
> +                goto end;
> +            }
> +            if (raw_frame_header->show_frame) {
> +                if (set_output_frame(s->cur_frame.tf.f, frame, pkt) < 0) {
> +                    av_log(avctx, AV_LOG_DEBUG, "Set output frame error:%d.\n",
> +                           ret);
> +                    goto end;
> +                }
> +
> +                *got_frame = 1;
> +            }
> +            break;
> +        case AV1_OBU_TILE_GROUP:

You can merge this and the frame/frame_header cases above, letting it
fall through at the point it would start reading the AV1_OBU_TILE_GROUP
bits in the former case.

> +            raw_tile_group = &obu->obu.tile_group;
> +            tile_group_offset = raw_tile_group->tile_data.data - pkt->data;
> +            get_tiles_info(avctx, raw_tile_group, tile_group_offset);
> +
> +            if (avctx->hwaccel) {
> +                ret = avctx->hwaccel->decode_slice(avctx, pkt->data, pkt->size);
> +                if (ret < 0) {
> +                    av_log(avctx, AV_LOG_ERROR,
> +                           "HW accel decode slice fail.\n");
> +                    return ret;
> +                }
> +            }
> +            if (s->tile_num != raw_tile_group->tg_end + 1)
> +                break;
> +            ret = decode_current_frame(avctx);
> +            if (ret < 0) {
> +                av_log(avctx, AV_LOG_DEBUG, "Decode current frame error:%d.\n",
> +                       ret);
> +                goto end;
> +            }
> +
> +            if (raw_frame_header->show_frame) {
> +                if (set_output_frame(s->cur_frame.tf.f, frame, pkt) < 0) {
> +                    av_log(avctx, AV_LOG_DEBUG, "Set output frame error:%d.\n",
> +                           ret);
> +                    goto end;
> +                }
> +
> +                *got_frame = 1;
> +            }
> +            break;
> +        case AV1_OBU_TILE_LIST:
> +        case AV1_OBU_TEMPORAL_DELIMITER:
> +        case AV1_OBU_PADDING:
> +        case AV1_OBU_METADATA:
> +            break;
> +        default:
> +            av_log(avctx, AV_LOG_DEBUG,
> +                   "Un-implemented obu type: %d (%zd bits).\n",

Unknown, rather than unimplemented.

> +                   unit->type, unit->data_size);
> +        }
> +    }
> +
> +end:
> +    ff_cbs_fragment_reset(&s->current_obu);
> +    return ret;
> +}
> +
> +static void av1_decode_flush(AVCodecContext *avctx)
> +{
> +    AV1DecContext *s = avctx->priv_data;
> +    int i;
> +
> +    for (i = 0; i < FF_ARRAY_ELEMS(s->ref); i++)
> +        av1_frame_unref(avctx, &s->ref[i]);
> +
> +    av1_frame_unref(avctx, &s->cur_frame);
> +}
> +
> +AVCodec ff_av1_decoder = {
> +    .name                  = "av1",
> +    .long_name             = NULL_IF_CONFIG_SMALL("Hardware Acceleration AV1"),

"Alliance for Open Media AV1" or something like that. The fact the
decoder is currently hwaccel backed only is irrelevant to its name.

> +    .type                  = AVMEDIA_TYPE_VIDEO,
> +    .id                    = AV_CODEC_ID_AV1,
> +    .priv_data_size        = sizeof(AV1DecContext),
> +    .init                  = av1_decode_init,
> +    .close                 = av1_decode_free,
> +    .decode                = av1_decode_frame,
> +    .capabilities          = AV_CODEC_CAP_DR1,

Probably should add caps_internal like FF_CODEC_CAP_INIT_THREADSAFE and
FF_CODEC_CAP_INIT_CLEANUP.

> +    .flush                 = av1_decode_flush,
> +    .profiles              = NULL_IF_CONFIG_SMALL(ff_av1_profiles),
> +    .hw_configs            = (const AVCodecHWConfigInternal * []) {
> +        NULL
> +    },
> +};
> diff --git a/libavcodec/av1dec.h b/libavcodec/av1dec.h
> new file mode 100644
> index 0000000000..5cb0e896b7
> --- /dev/null
> +++ b/libavcodec/av1dec.h
> @@ -0,0 +1,89 @@
> +/*
> + * AV1 video decoder
> + * *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVCODEC_AV1DEC_H
> +#define AVCODEC_AV1DEC_H
> +
> +#include <stdatomic.h>
> +#include "libavutil/buffer.h"
> +#include "libavutil/md5.h"
> +#include "avcodec.h"
> +#include "bswapdsp.h"
> +#include "get_bits.h"
> +#include "av1_parse.h"
> +#include "libavformat/avformat.h"
> +#include "libavformat/av1.h"
> +#include "thread.h"
> +#include "cbs.h"
> +#include "cbs_av1.h"
> +
> +typedef enum AV1FrameRestorationType {
> +    AV1_RESTORE_NONE,
> +    AV1_RESTORE_WIENER,
> +    AV1_RESTORE_SGRPROJ,
> +    AV1_RESTORE_SWITCHABLE,
> +} AV1FrameRestorationType;

Should be added to av1.h instead.

> +
> +typedef struct AV1Frame {
> +    ThreadFrame tf;
> +
> +    AVBufferRef *hwaccel_priv_buf;
> +    void *hwaccel_picture_private;
> +
> +    uint8_t loop_filter_delta_enabled;
> +    int8_t  loop_filter_ref_deltas[AV1_TOTAL_REFS_PER_FRAME];
> +    int8_t  loop_filter_mode_deltas[2];
> +    uint8_t gm_type[AV1_TOTAL_REFS_PER_FRAME];
> +    int32_t gm_params[AV1_TOTAL_REFS_PER_FRAME][6];
> +} AV1Frame;
> +
> +typedef struct TileGroupInfo {
> +    uint32_t tile_group_offset;
> +    uint32_t tile_offset;
> +    uint32_t tile_size;
> +    uint16_t tile_row;
> +    uint16_t tile_column;
> +    uint16_t tile_width_sbs;
> +    uint16_t tile_height_sbs;
> +    uint32_t tile_size_bytes;
> +} TileGroupInfo;
> +
> +typedef struct AV1DecContext {
> +    const AVClass *class;
> +    AVCodecContext *avctx;
> +
> +    enum AVPixelFormat pix_fmt;
> +    CodedBitstreamContext *cbc;
> +    CodedBitstreamFragment current_obu;
> +
> +    AV1RawSequenceHeader raw_seq;
> +    AV1RawFrameHeader raw_frame_header;
> +    AV1RawTileGroup raw_tile_group;
> +    TileGroupInfo tile_group_info[128];
> +    uint16_t tile_num;
> +    uint16_t tg_start;
> +    uint16_t tg_end;
> +
> +    AV1Frame ref[8];
> +    AV1Frame cur_frame;
> +
> +} AV1DecContext;
> +
> +#endif /* AVCODEC_AV1DEC_H */
> diff --git a/libavcodec/version.h b/libavcodec/version.h
> index a3f9f828ee..5bdfdce363 100644
> --- a/libavcodec/version.h
> +++ b/libavcodec/version.h
> @@ -28,7 +28,7 @@
>  #include "libavutil/version.h"
>  
>  #define LIBAVCODEC_VERSION_MAJOR  58
> -#define LIBAVCODEC_VERSION_MINOR 100
> +#define LIBAVCODEC_VERSION_MINOR 101
>  #define LIBAVCODEC_VERSION_MICRO 100
>  
>  #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
>
Paul B Mahol Aug. 21, 2020, 2:47 p.m. UTC | #2
On 8/21/20, Fei Wang <fei.w.wang@intel.com> wrote:
> This av1 decoder is now only used for av1 hardware acceleration
> decoder. Consider it can be extend to a local decoder like hevc
> or vp9 in the future, so define its name as "av1" and put it into
> external libraries codec list.
>
> Signed-off-by: Fei Wang <fei.w.wang@intel.com>
> ---
>  Changelog              |   1 +
>  configure              |   1 +
>  libavcodec/Makefile    |   1 +
>  libavcodec/allcodecs.c |   1 +
>  libavcodec/av1dec.c    | 746 +++++++++++++++++++++++++++++++++++++++++
>  libavcodec/av1dec.h    |  89 +++++

Sorry but those names are reserved for real native AV1 decoder.

>  libavcodec/version.h   |   2 +-
>  7 files changed, 840 insertions(+), 1 deletion(-)
>  create mode 100644 libavcodec/av1dec.c
>  create mode 100644 libavcodec/av1dec.h
>
> diff --git a/Changelog b/Changelog
> index 1efc768387..3ff88cc12f 100644
> --- a/Changelog
> +++ b/Changelog
> @@ -14,6 +14,7 @@ version <next>:
>  - ADPCM Argonaut Games encoder
>  - Argonaut Games ASF muxer
>  - AV1 Low overhead bitstream format demuxer
> +- AV1 decoder (Hardware acceleration used only)
>
>
>  version 4.3:
> diff --git a/configure b/configure
> index 6faff9bc7b..ef71e47c4e 100755
> --- a/configure
> +++ b/configure
> @@ -2685,6 +2685,7 @@ atrac3al_decoder_select="mdct"
>  atrac3p_decoder_select="mdct sinewin"
>  atrac3pal_decoder_select="mdct sinewin"
>  atrac9_decoder_select="mdct"
> +av1_decoder_select="cbs_av1"
>  avrn_decoder_select="exif jpegtables"
>  bink_decoder_select="blockdsp hpeldsp"
>  binkaudio_dct_decoder_select="mdct rdft dct sinewin wma_freqs"
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index 3431ba2dca..2371039bd2 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -764,6 +764,7 @@ OBJS-$(CONFIG_ZLIB_DECODER)            += lcldec.o
>  OBJS-$(CONFIG_ZLIB_ENCODER)            += lclenc.o
>  OBJS-$(CONFIG_ZMBV_DECODER)            += zmbv.o
>  OBJS-$(CONFIG_ZMBV_ENCODER)            += zmbvenc.o
> +OBJS-$(CONFIG_AV1_DECODER)             += av1dec.o
>
>  # (AD)PCM decoders/encoders
>  OBJS-$(CONFIG_PCM_ALAW_DECODER)           += pcm.o
> diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
> index 4bd830e5d0..00799d3431 100644
> --- a/libavcodec/allcodecs.c
> +++ b/libavcodec/allcodecs.c
> @@ -761,6 +761,7 @@ extern AVCodec ff_idf_decoder;
>   * above is available */
>  extern AVCodec ff_aac_mf_encoder;
>  extern AVCodec ff_ac3_mf_encoder;
> +extern AVCodec ff_av1_decoder;
>  extern AVCodec ff_h263_v4l2m2m_encoder;
>  extern AVCodec ff_libaom_av1_decoder;
>  extern AVCodec ff_libopenh264_encoder;
> diff --git a/libavcodec/av1dec.c b/libavcodec/av1dec.c
> new file mode 100644
> index 0000000000..ab88ed987a
> --- /dev/null
> +++ b/libavcodec/av1dec.c
> @@ -0,0 +1,746 @@
> +/*
> + * AV1video decoder
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
> USA
> + */
> +
> +#include "avcodec.h"
> +#include "get_bits.h"
> +#include "hwconfig.h"
> +#include "internal.h"
> +#include "profiles.h"
> +#include "thread.h"
> +#include "videodsp.h"
> +#include "libavutil/avassert.h"
> +#include "libavutil/pixdesc.h"
> +
> +#include "av1dec.h"
> +#include "libavformat/av1.h"
> +
> +static void setup_past_independence(AV1Frame *f)
> +{
> +    f->loop_filter_delta_enabled = 1;
> +
> +    f->loop_filter_ref_deltas[AV1_REF_FRAME_INTRA] = 1;
> +    f->loop_filter_ref_deltas[AV1_REF_FRAME_LAST] = 0;
> +    f->loop_filter_ref_deltas[AV1_REF_FRAME_LAST2] = 0;
> +    f->loop_filter_ref_deltas[AV1_REF_FRAME_LAST3] = 0;
> +    f->loop_filter_ref_deltas[AV1_REF_FRAME_GOLDEN] = -1;
> +    f->loop_filter_ref_deltas[AV1_REF_FRAME_BWDREF] = 0;
> +    f->loop_filter_ref_deltas[AV1_REF_FRAME_ALTREF2] = -1;
> +    f->loop_filter_ref_deltas[AV1_REF_FRAME_ALTREF] = -1;
> +
> +    f->loop_filter_mode_deltas[0] = 0;
> +    f->loop_filter_mode_deltas[1] = 0;
> +}
> +
> +static void load_previous_and_update(AV1DecContext *s)
> +{
> +    int i;
> +
> +    memcpy(s->cur_frame.loop_filter_ref_deltas,
> +
> s->ref[s->raw_frame_header.primary_ref_frame].loop_filter_ref_deltas,
> +           AV1_TOTAL_REFS_PER_FRAME * sizeof(int8_t));
> +    memcpy(s->cur_frame.loop_filter_mode_deltas,
> +
> s->ref[s->raw_frame_header.primary_ref_frame].loop_filter_mode_deltas,
> +           2 * sizeof(int8_t));
> +
> +    if (s->raw_frame_header.loop_filter_delta_update) {
> +        for (i = 0; i < AV1_TOTAL_REFS_PER_FRAME; i++) {
> +            if (s->raw_frame_header.update_ref_delta[i])
> +                s->cur_frame.loop_filter_ref_deltas[i] =
> +                    s->raw_frame_header.loop_filter_ref_deltas[i];
> +        }
> +
> +        for (i = 0; i < 2; i++) {
> +            if (s->raw_frame_header.update_mode_delta[i])
> +                s->cur_frame.loop_filter_mode_deltas[i] =
> +                    s->raw_frame_header.loop_filter_mode_deltas[i];
> +        }
> +    }
> +
> +    s->cur_frame.loop_filter_delta_enabled =
> +        s->raw_frame_header.loop_filter_delta_enabled;
> +}
> +
> +static uint32_t inverse_recenter(int r, uint32_t v)
> +{
> +    if (v > 2 * r)
> +        return v;
> +    else if (v & 1)
> +        return r - ((v + 1) >> 1);
> +    else
> +        return r + (v >> 1);
> +}
> +
> +static uint32_t decode_unsigned_subexp_with_ref(uint32_t sub_exp,
> +                                                int mx, int r)
> +{
> +    if ((r << 1) <= mx) {
> +        return inverse_recenter(r, sub_exp);
> +    } else {
> +        return mx - 1 - inverse_recenter(mx - 1 - r, sub_exp);
> +    }
> +}
> +
> +static int32_t decode_signed_subexp_with_ref(uint32_t sub_exp, int low,
> +                                             int high, int r)
> +{
> +    int32_t x = decode_unsigned_subexp_with_ref(sub_exp, high - low, r -
> low);
> +    return x + low;
> +}
> +
> +static void read_global_param(AV1DecContext *s, int type, int ref, int idx)
> +{
> +    uint32_t abs_bits, prec_bits, round, prec_diff, sub, mx, r;
> +    abs_bits = AV1_GM_ABS_ALPHA_BITS;
> +    prec_bits = AV1_GM_ALPHA_PREC_BITS;
> +
> +    if (idx < 2) {
> +        if (type == AV1_WARP_MODEL_TRANSLATION) {
> +            abs_bits = AV1_GM_ABS_TRANS_ONLY_BITS -
> +                !s->raw_frame_header.allow_high_precision_mv;
> +            prec_bits = AV1_GM_TRANS_ONLY_PREC_BITS -
> +                !s->raw_frame_header.allow_high_precision_mv;
> +        } else {
> +            abs_bits = AV1_GM_ABS_TRANS_BITS;
> +            prec_bits = AV1_GM_TRANS_PREC_BITS;
> +        }
> +    }
> +    round = (idx % 3) == 2 ? (1 << AV1_WARPEDMODEL_PREC_BITS) : 0;
> +    prec_diff = AV1_WARPEDMODEL_PREC_BITS - prec_bits;
> +    sub = (idx % 3) == 2 ? (1 << prec_bits) : 0;
> +    mx = 1 << abs_bits;
> +    r = (s->ref[s->raw_frame_header.primary_ref_frame].gm_params[ref][idx]
> +         >> prec_diff) - sub;
> +
> +    s->cur_frame.gm_params[ref][idx] =
> +
> (decode_signed_subexp_with_ref(s->raw_frame_header.gm_params[ref][idx],
> +                                       -mx, mx + 1, r) << prec_diff) +
> round;
> +}
> +
> +/**
> +* update gm type/params, since cbs already implemented part of this
> funcation,
> +* so we don't need to full implement spec.
> +*/
> +static void global_motion_params(AV1DecContext *s)
> +{
> +    const AV1RawFrameHeader *header = &s->raw_frame_header;
> +    int type;
> +    int i, j;
> +    for (i = AV1_REF_FRAME_LAST; i <= AV1_REF_FRAME_ALTREF; i++) {
> +        s->cur_frame.gm_type[i] = AV1_WARP_MODEL_IDENTITY;
> +        for (j = 0; j <= 5; j++)
> +            s->cur_frame.gm_params[i][j] = (j % 3 == 2) ?
> +                                           1 << AV1_WARPEDMODEL_PREC_BITS :
> 0;
> +    }
> +    if (header->frame_type == AV1_FRAME_KEY ||
> +        header->frame_type == AV1_FRAME_INTRA_ONLY)
> +        return;
> +
> +    for (i = AV1_REF_FRAME_LAST; i <= AV1_REF_FRAME_ALTREF; i++) {
> +        if (header->is_global[i]) {
> +            if (header->is_rot_zoom[i]) {
> +                type = AV1_WARP_MODEL_ROTZOOM;
> +            } else {
> +                type = header->is_translation[i] ?
> AV1_WARP_MODEL_TRANSLATION
> +                                                 : AV1_WARP_MODEL_AFFINE;
> +            }
> +        } else {
> +            type = AV1_WARP_MODEL_IDENTITY;
> +        }
> +        s->cur_frame.gm_type[i] = type;
> +
> +        if (type >= AV1_WARP_MODEL_ROTZOOM) {
> +            read_global_param(s, type, i, 2);
> +            read_global_param(s, type, i, 3);
> +            if (type == AV1_WARP_MODEL_AFFINE) {
> +                read_global_param(s, type, i, 4);
> +                read_global_param(s, type, i, 5);
> +            } else {
> +                s->cur_frame.gm_params[i][4] = -
> s->cur_frame.gm_params[i][3];
> +                s->cur_frame.gm_params[i][5] = -
> s->cur_frame.gm_params[i][2];
> +            }
> +        }
> +        if (type >= AV1_WARP_MODEL_TRANSLATION) {
> +            read_global_param(s, type, i, 0);
> +            read_global_param(s, type, i, 1);
> +        }
> +    }
> +}
> +
> +static int get_tiles_info(AVCodecContext *avctx,
> +                          AV1RawTileGroup *tile_group,
> +                          uint32_t tile_group_offset)
> +{
> +    AV1DecContext *s = avctx->priv_data;
> +    GetBitContext gb;
> +    uint16_t tile_num, tile_row, tile_col;
> +    uint16_t mi_cols, mi_rows, sb_cols, sb_rows, tile_width_sb,
> tile_height_sb;
> +    uint32_t size = 0, size_bytes = 0, offset = 0;
> +    int ret = 0;
> +
> +    if ((ret = init_get_bits8(&gb,
> +                              tile_group->tile_data.data,
> +                              tile_group->tile_data.data_size)) < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "Fail to initialize bitstream
> reader.\n");
> +        return ret;
> +    }
> +
> +    s->tg_start = tile_group->tg_start;
> +    s->tg_end = tile_group->tg_end;
> +
> +    if (s->raw_frame_header.uniform_tile_spacing_flag) {
> +        mi_cols = 2 * ((s->raw_seq.max_frame_width_minus_1 + 8) >> 3);
> +        mi_rows = 2 * ((s->raw_seq.max_frame_height_minus_1 + 8) >> 3);
> +        sb_cols = s->raw_seq.use_128x128_superblock ? ((mi_cols + 31) >> 5)
> +                                                    : ((mi_cols + 15) >>
> 4);
> +        sb_rows = s->raw_seq.use_128x128_superblock ? ((mi_rows + 31) >> 5)
> +                                                    : ((mi_rows + 15) >>
> 4);
> +        tile_width_sb = (sb_cols + (1 <<
> s->raw_frame_header.tile_cols_log2)
> +                         -1) >> s->raw_frame_header.tile_cols_log2;
> +        tile_height_sb = (sb_rows + (1 <<
> s->raw_frame_header.tile_rows_log2)
> +                         -1) >> s->raw_frame_header.tile_rows_log2;
> +    }
> +
> +    for (tile_num = tile_group->tg_start; tile_num <= tile_group->tg_end;
> tile_num++) {
> +        tile_row = tile_num / s->raw_frame_header.tile_cols;
> +        tile_col = tile_num % s->raw_frame_header.tile_cols;
> +
> +        if (tile_num == tile_group->tg_end) {
> +            s->tile_group_info[tile_num].tile_group_offset =
> tile_group_offset;
> +            s->tile_group_info[tile_num].tile_size = get_bits_left(&gb) /
> 8;
> +            s->tile_group_info[tile_num].tile_offset = offset;
> +            s->tile_group_info[tile_num].tile_row = tile_row;
> +            s->tile_group_info[tile_num].tile_column = tile_col;
> +            s->tile_group_info[tile_num].tile_size_bytes = size_bytes;
> +            if (s->raw_frame_header.uniform_tile_spacing_flag) {
> +                s->tile_group_info[tile_num].tile_width_sbs =
> +                    (tile_col + 1) == s->raw_frame_header.tile_cols ?
> +                    sb_cols - tile_col * tile_width_sb :
> +                    tile_width_sb;
> +                s->tile_group_info[tile_num].tile_height_sbs =
> +                    (tile_row + 1) == s->raw_frame_header.tile_rows ?
> +                    sb_rows - tile_row * tile_height_sb :
> +                    tile_height_sb;
> +            } else {
> +                s->tile_group_info[tile_num].tile_width_sbs =
> +                    s->raw_frame_header.width_in_sbs_minus_1[tile_col] + 1;
> +                s->tile_group_info[tile_num].tile_height_sbs =
> +                    s->raw_frame_header.height_in_sbs_minus_1[tile_row] +
> 1;
> +            }
> +            return 0;
> +        }
> +        size_bytes = s->raw_frame_header.tile_size_bytes_minus1 + 1;
> +        size = get_bits_le(&gb, size_bytes * 8) + 1;
> +        skip_bits(&gb, size * 8);
> +
> +        offset += size_bytes;
> +
> +        s->tile_group_info[tile_num].tile_group_offset = tile_group_offset;
> +        s->tile_group_info[tile_num].tile_size = size;
> +        s->tile_group_info[tile_num].tile_offset = offset;
> +        s->tile_group_info[tile_num].tile_row = tile_row;
> +        s->tile_group_info[tile_num].tile_column = tile_col;
> +        s->tile_group_info[tile_num].tile_size_bytes = size_bytes;
> +        if (s->raw_frame_header.uniform_tile_spacing_flag) {
> +            s->tile_group_info[tile_num].tile_width_sbs =
> +                (tile_col + 1) == s->raw_frame_header.tile_cols ?
> +                sb_cols - tile_col * tile_width_sb :
> +                tile_width_sb;
> +            s->tile_group_info[tile_num].tile_height_sbs =
> +                (tile_row + 1) == s->raw_frame_header.tile_rows ?
> +                sb_rows - tile_row * tile_height_sb :
> +                tile_height_sb;
> +        } else {
> +            s->tile_group_info[tile_num].tile_width_sbs =
> +                s->raw_frame_header.width_in_sbs_minus_1[tile_col] + 1;
> +            s->tile_group_info[tile_num].tile_height_sbs =
> +                s->raw_frame_header.height_in_sbs_minus_1[tile_row] + 1;
> +        }
> +        offset += size;
> +    }
> +
> +    return 0;
> +
> +}
> +
> +static int get_pixel_format(AVCodecContext *avctx)
> +{
> +    AV1DecContext *s = avctx->priv_data;
> +    const AV1RawSequenceHeader *seq = &s->raw_seq;
> +    uint8_t bit_depth;
> +    int ret = 0;
> +    enum AVPixelFormat pix_fmt = AV_PIX_FMT_NONE;
> +#define HWACCEL_MAX (0)
> +    enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
> +
> +    if (seq->seq_profile == 2 && seq->color_config.high_bitdepth)
> +        bit_depth = seq->color_config.twelve_bit ? 12 : 10;
> +    else if (seq->seq_profile <= 2)
> +        bit_depth = seq->color_config.high_bitdepth ? 10 : 8;
> +    else {
> +        av_log(avctx, AV_LOG_ERROR,
> +               "Unknow av1 profile %d.\n", seq->seq_profile);
> +        return -1;
> +    }
> +
> +    if (!seq->color_config.mono_chrome) {
> +        // 4:4:4 x:0 y:0, 4:2:2 x:1 y:0, 4:2:0 x:1 y:1
> +        if (seq->color_config.subsampling_x == 0 &&
> +            seq->color_config.subsampling_y == 0) {
> +            if (bit_depth == 8)
> +                pix_fmt = AV_PIX_FMT_YUV444P;
> +            else if (bit_depth == 10)
> +                pix_fmt = AV_PIX_FMT_YUV444P10;
> +            else if (bit_depth == 12)
> +                pix_fmt = AV_PIX_FMT_YUV444P12;
> +            else
> +                av_log(avctx, AV_LOG_WARNING, "Unknow av1 pixel
> format.\n");
> +        } else if (seq->color_config.subsampling_x == 1 &&
> +                   seq->color_config.subsampling_y == 0) {
> +            if (bit_depth == 8)
> +                pix_fmt = AV_PIX_FMT_YUV422P;
> +            else if (bit_depth == 10)
> +                pix_fmt = AV_PIX_FMT_YUV422P10;
> +            else if (bit_depth == 12)
> +                pix_fmt = AV_PIX_FMT_YUV422P12;
> +            else
> +                av_log(avctx, AV_LOG_WARNING, "Unknow av1 pixel
> format.\n");
> +        } else if (seq->color_config.subsampling_x == 1 &&
> +                   seq->color_config.subsampling_y == 1) {
> +            if (bit_depth == 8)
> +                pix_fmt = AV_PIX_FMT_YUV420P;
> +            else if (bit_depth == 10)
> +                pix_fmt = AV_PIX_FMT_YUV420P10;
> +            else if (bit_depth == 12)
> +                pix_fmt = AV_PIX_FMT_YUV420P12;
> +            else
> +                av_log(avctx, AV_LOG_WARNING, "Unknow av1 pixel
> format.\n");
> +        }
> +    } else {
> +        if (seq->color_config.subsampling_x == 1 &&
> +            seq->color_config.subsampling_y == 1)
> +            pix_fmt = AV_PIX_FMT_YUV440P;
> +        else
> +            av_log(avctx, AV_LOG_WARNING, "Unknow av1 pixel format.\n");
> +    }
> +
> +    av_log(avctx, AV_LOG_DEBUG, "Av1 decode get format: %s.\n",
> +           av_get_pix_fmt_name(pix_fmt));
> +
> +    if (pix_fmt == AV_PIX_FMT_NONE)
> +        return -1;
> +    s->pix_fmt = pix_fmt;
> +
> +    *fmtp++ = s->pix_fmt;
> +    *fmtp = AV_PIX_FMT_NONE;
> +    ret = ff_thread_get_format(avctx, pix_fmts);
> +    if (ret < 0)
> +        return ret;
> +
> +    avctx->pix_fmt = ret;
> +
> +    return 0;
> +}
> +
> +static void av1_frame_unref(AVCodecContext *avctx, AV1Frame *f)
> +{
> +    ff_thread_release_buffer(avctx, &f->tf);
> +    av_buffer_unref(&f->hwaccel_priv_buf);
> +    f->hwaccel_picture_private = NULL;
> +}
> +
> +static int av1_frame_ref(AVCodecContext *avctx, AV1Frame *dst, AV1Frame
> *src)
> +{
> +    int ret = 0;
> +
> +    ret = ff_thread_ref_frame(&dst->tf, &src->tf);
> +    if (ret < 0)
> +        return ret;
> +
> +    if (src->hwaccel_picture_private) {
> +        dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
> +        if (!dst->hwaccel_priv_buf)
> +            goto fail;
> +        dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
> +    }
> +
> +    dst->loop_filter_delta_enabled = src->loop_filter_delta_enabled;
> +    memcpy(dst->loop_filter_ref_deltas,
> +           src->loop_filter_ref_deltas,
> +           AV1_TOTAL_REFS_PER_FRAME * sizeof(int8_t));
> +    memcpy(dst->loop_filter_mode_deltas,
> +           src->loop_filter_mode_deltas,
> +           2 * sizeof(int8_t));
> +    memcpy(dst->gm_type,
> +           src->gm_type,
> +           AV1_TOTAL_REFS_PER_FRAME * sizeof(uint8_t));
> +    memcpy(dst->gm_params,
> +           src->gm_params,
> +           AV1_TOTAL_REFS_PER_FRAME * 6 * sizeof(int8_t));
> +
> +    return 0;
> +
> +fail:
> +    av1_frame_unref(avctx, dst);
> +    return AVERROR(ENOMEM);
> +}
> +
> +static av_cold int av1_decode_free(AVCodecContext *avctx)
> +{
> +    AV1DecContext *s = avctx->priv_data;
> +    int i;
> +
> +    for (i = 0; i < FF_ARRAY_ELEMS(s->ref); i++) {
> +        if (s->ref[i].tf.f->buf[0])
> +            av1_frame_unref(avctx, &s->ref[i]);
> +        av_frame_free(&s->ref[i].tf.f);
> +    }
> +    if (s->cur_frame.tf.f->buf[0])
> +        av1_frame_unref(avctx, &s->cur_frame);
> +    av_frame_free(&s->cur_frame.tf.f);
> +
> +    ff_cbs_fragment_free(&s->current_obu);
> +    ff_cbs_close(&s->cbc);
> +
> +    return 0;
> +}
> +
> +static av_cold int av1_decode_init(AVCodecContext *avctx)
> +{
> +    AV1DecContext *s = avctx->priv_data;
> +    int i;
> +    int ret = 0;
> +
> +    for (i = 0; i < FF_ARRAY_ELEMS(s->ref); i++) {
> +        s->ref[i].tf.f = av_frame_alloc();
> +        if (!s->ref[i].tf.f) {
> +            av1_decode_free(avctx);
> +            av_log(avctx, AV_LOG_ERROR,
> +                   "Fail to allocate frame buffer %d.\n", i);
> +            return AVERROR(ENOMEM);
> +        }
> +    }
> +
> +        s->cur_frame.tf.f = av_frame_alloc();
> +        if (!s->cur_frame.tf.f) {
> +            av1_decode_free(avctx);
> +            av_log(avctx, AV_LOG_ERROR, "Fail to allocate frame
> buffer.\n");
> +            return AVERROR(ENOMEM);
> +        }
> +
> +    s->avctx = avctx;
> +    s->pix_fmt = AV_PIX_FMT_NONE;
> +
> +    ret = ff_cbs_init(&s->cbc, AV_CODEC_ID_AV1, avctx);
> +    if (ret < 0)
> +        return ret;
> +    return 0;
> +}
> +
> +static int av1_frame_alloc(AVCodecContext *avctx, AV1Frame *f)
> +{
> +    int ret;
> +    if ((ret = ff_thread_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF))
> < 0)
> +        return ret;
> +
> +    if (avctx->hwaccel) {
> +        const AVHWAccel *hwaccel = avctx->hwaccel;
> +        if (hwaccel->frame_priv_data_size) {
> +            f->hwaccel_priv_buf =
> +                av_buffer_allocz(hwaccel->frame_priv_data_size);
> +            if (!f->hwaccel_priv_buf)
> +                goto fail;
> +            f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
> +        }
> +    }
> +    return 0;
> +
> +fail:
> +    av1_frame_unref(avctx, f);
> +    return AVERROR(ENOMEM);
> +}
> +
> +static int set_output_frame(AVFrame *srcframe, AVFrame *frame, AVPacket
> *pkt)
> +{
> +    int ret = 0;
> +    if ((ret = av_frame_ref(frame, srcframe)) < 0) {
> +        return ret;
> +    }
> +
> +    frame->pts = pkt->pts;
> +    frame->pkt_dts = pkt->dts;
> +
> +    return 0;
> +}
> +
> +static int update_reference_list (AVCodecContext *avctx)
> +{
> +    AV1DecContext *s = avctx->priv_data;
> +    AV1RawFrameHeader *header= &s->raw_frame_header;
> +    int i;
> +    int ret = 0;
> +
> +    for (i = 0; i < 8; i++) {
> +        if (header->frame_type == AV1_FRAME_KEY ||
> +            (header->refresh_frame_flags & (1 << i))) {
> +            if (s->ref[i].tf.f->buf[0])
> +                av1_frame_unref(avctx, &s->ref[i]);
> +            if ((ret = av1_frame_ref(avctx, &s->ref[i], &s->cur_frame)) <
> 0) {
> +                av_log(avctx, AV_LOG_DEBUG, "Ref frame error:%d.\n", ret);
> +                return ret;
> +            }
> +        }
> +    }
> +    return 0;
> +}
> +
> +static int get_current_frame(AVCodecContext *avctx)
> +{
> +    AV1DecContext *s = avctx->priv_data;
> +    int ret = 0;
> +
> +    if (s->cur_frame.tf.f->buf[0])
> +        av1_frame_unref(avctx, &s->cur_frame);
> +
> +    ret = av1_frame_alloc(avctx, &s->cur_frame);
> +    if (ret < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "Get current frame fail.\n");
> +        return ret;
> +    }
> +
> +    if (s->raw_frame_header.primary_ref_frame == AV1_PRIMARY_REF_NONE)
> +        setup_past_independence(&s->cur_frame);
> +    else
> +        load_previous_and_update(s);
> +
> +    global_motion_params(s);
> +
> +    return ret;
> +}
> +
> +static int decode_current_frame(AVCodecContext *avctx)
> +{
> +    int ret = 0;
> +
> +    if (avctx->hwaccel) {
> +        ret = avctx->hwaccel->start_frame(avctx, NULL, 0);
> +        if (ret < 0) {
> +            av_log(avctx, AV_LOG_ERROR, "HW accel strat frame fail.\n");
> +            return ret;
> +        }
> +
> +        ret = avctx->hwaccel->end_frame(avctx);
> +        if (ret < 0) {
> +            av_log(avctx, AV_LOG_ERROR, "HW accel end frame fail.\n");
> +            return ret;
> +        }
> +    } else {
> +        av_log(avctx, AV_LOG_ERROR, "Your platform doesn't suppport
> hardware "
> +               "acceleration AV1 decode.\n");
> +        return -1;
> +    }
> +
> +    ret = update_reference_list(avctx);
> +    if (ret < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "Fail to update reference list.\n");
> +        return ret;
> +    }
> +    return 0;
> +}
> +
> +static int av1_decode_frame(AVCodecContext *avctx, void *frame,
> +                            int *got_frame, AVPacket *pkt)
> +{
> +    AV1DecContext *s = avctx->priv_data;
> +    AV1RawSequenceHeader* raw_seq = NULL;
> +    AV1RawFrameHeader *raw_frame_header = NULL;
> +    AV1RawTileGroup *raw_tile_group = NULL;
> +    uint32_t tile_group_offset = 0;
> +    int i;
> +    int ret = 0;
> +
> +    ret = ff_cbs_read_packet(s->cbc, &s->current_obu, pkt);
> +    if (ret < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "Fail to read packet.\n");
> +        goto end;
> +    }
> +    av_log(avctx, AV_LOG_DEBUG, "Total obu for this frame:%d.\n",
> +           s->current_obu.nb_units);
> +
> +    for (i = 0; i < s->current_obu.nb_units; i++) {
> +        CodedBitstreamUnit *unit = &s->current_obu.units[i];
> +        AV1RawOBU *obu = unit->content;
> +        av_log(avctx, AV_LOG_DEBUG, "Obu idx:%d, obu type:%d.\n", i,
> unit->type);
> +
> +        switch (unit->type) {
> +        case AV1_OBU_SEQUENCE_HEADER:
> +            raw_seq = &obu->obu.sequence_header;
> +            memcpy(&s->raw_seq, raw_seq, sizeof(AV1RawSequenceHeader));
> +            if (s->pix_fmt == AV_PIX_FMT_NONE) {
> +                ret = get_pixel_format(avctx);
> +                if (ret < 0) {
> +                    av_log(avctx, AV_LOG_ERROR,
> +                           "Fail to get format from sequence header.\n");
> +                    goto end;
> +                }
> +            }
> +            break;
> +        case AV1_OBU_FRAME_HEADER:
> +        case AV1_OBU_REDUNDANT_FRAME_HEADER:
> +            raw_frame_header = &obu->obu.frame_header;
> +            memcpy (&s->raw_frame_header,
> +                    raw_frame_header,
> +                    sizeof(AV1RawFrameHeader));
> +            s->tile_num =
> +                raw_frame_header->tile_cols * raw_frame_header->tile_rows;
> +            if (raw_frame_header->show_existing_frame) {
> +                if
> (set_output_frame(s->ref[raw_frame_header->frame_to_show_map_idx].tf.f,
> +                                     frame, pkt) < 0) {
> +                    av_log(avctx, AV_LOG_DEBUG, "Set output frame
> error:%d.\n",
> +                           ret);
> +                    goto end;
> +                }
> +                *got_frame = 1;
> +                goto end;
> +            } else {
> +                ret = get_current_frame(avctx);
> +                if (ret < 0) {
> +                    av_log(avctx, AV_LOG_DEBUG, "Get current frame
> error:%d.\n",
> +                           ret);
> +                    goto end;
> +                }
> +            }
> +            break;
> +        case AV1_OBU_FRAME:
> +            raw_frame_header = &obu->obu.frame.header;
> +            raw_tile_group = &obu->obu.frame.tile_group;
> +            memcpy(&s->raw_frame_header,
> +                   raw_frame_header,
> +                   sizeof(AV1RawFrameHeader));
> +            s->tile_num =
> +                raw_frame_header->tile_cols * raw_frame_header->tile_rows;
> +            tile_group_offset = raw_tile_group->tile_data.data - pkt->data;
> +            get_tiles_info(avctx, raw_tile_group, tile_group_offset);
> +
> +            ret = get_current_frame(avctx);
> +            if (ret < 0) {
> +                av_log(avctx, AV_LOG_DEBUG, "Get current frame
> error:%d.\n",
> +                       ret);
> +                goto end;
> +            }
> +            if (avctx->hwaccel) {
> +                ret = avctx->hwaccel->decode_slice(avctx, pkt->data,
> pkt->size);
> +                if (ret < 0) {
> +                    av_log(avctx, AV_LOG_ERROR,
> +                           "HW accel decode slice fail.\n");
> +                    return ret;
> +                }
> +            }
> +            ret = decode_current_frame(avctx);
> +            if (ret < 0) {
> +                av_log(avctx, AV_LOG_DEBUG, "Decode current frame
> error:%d.\n",
> +                       ret);
> +                goto end;
> +            }
> +            if (raw_frame_header->show_frame) {
> +                if (set_output_frame(s->cur_frame.tf.f, frame, pkt) < 0) {
> +                    av_log(avctx, AV_LOG_DEBUG, "Set output frame
> error:%d.\n",
> +                           ret);
> +                    goto end;
> +                }
> +
> +                *got_frame = 1;
> +            }
> +            break;
> +        case AV1_OBU_TILE_GROUP:
> +            raw_tile_group = &obu->obu.tile_group;
> +            tile_group_offset = raw_tile_group->tile_data.data - pkt->data;
> +            get_tiles_info(avctx, raw_tile_group, tile_group_offset);
> +
> +            if (avctx->hwaccel) {
> +                ret = avctx->hwaccel->decode_slice(avctx, pkt->data,
> pkt->size);
> +                if (ret < 0) {
> +                    av_log(avctx, AV_LOG_ERROR,
> +                           "HW accel decode slice fail.\n");
> +                    return ret;
> +                }
> +            }
> +            if (s->tile_num != raw_tile_group->tg_end + 1)
> +                break;
> +            ret = decode_current_frame(avctx);
> +            if (ret < 0) {
> +                av_log(avctx, AV_LOG_DEBUG, "Decode current frame
> error:%d.\n",
> +                       ret);
> +                goto end;
> +            }
> +
> +            if (raw_frame_header->show_frame) {
> +                if (set_output_frame(s->cur_frame.tf.f, frame, pkt) < 0) {
> +                    av_log(avctx, AV_LOG_DEBUG, "Set output frame
> error:%d.\n",
> +                           ret);
> +                    goto end;
> +                }
> +
> +                *got_frame = 1;
> +            }
> +            break;
> +        case AV1_OBU_TILE_LIST:
> +        case AV1_OBU_TEMPORAL_DELIMITER:
> +        case AV1_OBU_PADDING:
> +        case AV1_OBU_METADATA:
> +            break;
> +        default:
> +            av_log(avctx, AV_LOG_DEBUG,
> +                   "Un-implemented obu type: %d (%zd bits).\n",
> +                   unit->type, unit->data_size);
> +        }
> +    }
> +
> +end:
> +    ff_cbs_fragment_reset(&s->current_obu);
> +    return ret;
> +}
> +
> +static void av1_decode_flush(AVCodecContext *avctx)
> +{
> +    AV1DecContext *s = avctx->priv_data;
> +    int i;
> +
> +    for (i = 0; i < FF_ARRAY_ELEMS(s->ref); i++)
> +        av1_frame_unref(avctx, &s->ref[i]);
> +
> +    av1_frame_unref(avctx, &s->cur_frame);
> +}
> +
> +AVCodec ff_av1_decoder = {
> +    .name                  = "av1",
> +    .long_name             = NULL_IF_CONFIG_SMALL("Hardware Acceleration
> AV1"),
> +    .type                  = AVMEDIA_TYPE_VIDEO,
> +    .id                    = AV_CODEC_ID_AV1,
> +    .priv_data_size        = sizeof(AV1DecContext),
> +    .init                  = av1_decode_init,
> +    .close                 = av1_decode_free,
> +    .decode                = av1_decode_frame,
> +    .capabilities          = AV_CODEC_CAP_DR1,
> +    .flush                 = av1_decode_flush,
> +    .profiles              = NULL_IF_CONFIG_SMALL(ff_av1_profiles),
> +    .hw_configs            = (const AVCodecHWConfigInternal * []) {
> +        NULL
> +    },
> +};
> diff --git a/libavcodec/av1dec.h b/libavcodec/av1dec.h
> new file mode 100644
> index 0000000000..5cb0e896b7
> --- /dev/null
> +++ b/libavcodec/av1dec.h
> @@ -0,0 +1,89 @@
> +/*
> + * AV1 video decoder
> + * *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
> USA
> + */
> +
> +#ifndef AVCODEC_AV1DEC_H
> +#define AVCODEC_AV1DEC_H
> +
> +#include <stdatomic.h>
> +#include "libavutil/buffer.h"
> +#include "libavutil/md5.h"
> +#include "avcodec.h"
> +#include "bswapdsp.h"
> +#include "get_bits.h"
> +#include "av1_parse.h"
> +#include "libavformat/avformat.h"
> +#include "libavformat/av1.h"
> +#include "thread.h"
> +#include "cbs.h"
> +#include "cbs_av1.h"
> +
> +typedef enum AV1FrameRestorationType {
> +    AV1_RESTORE_NONE,
> +    AV1_RESTORE_WIENER,
> +    AV1_RESTORE_SGRPROJ,
> +    AV1_RESTORE_SWITCHABLE,
> +} AV1FrameRestorationType;
> +
> +typedef struct AV1Frame {
> +    ThreadFrame tf;
> +
> +    AVBufferRef *hwaccel_priv_buf;
> +    void *hwaccel_picture_private;
> +
> +    uint8_t loop_filter_delta_enabled;
> +    int8_t  loop_filter_ref_deltas[AV1_TOTAL_REFS_PER_FRAME];
> +    int8_t  loop_filter_mode_deltas[2];
> +    uint8_t gm_type[AV1_TOTAL_REFS_PER_FRAME];
> +    int32_t gm_params[AV1_TOTAL_REFS_PER_FRAME][6];
> +} AV1Frame;
> +
> +typedef struct TileGroupInfo {
> +    uint32_t tile_group_offset;
> +    uint32_t tile_offset;
> +    uint32_t tile_size;
> +    uint16_t tile_row;
> +    uint16_t tile_column;
> +    uint16_t tile_width_sbs;
> +    uint16_t tile_height_sbs;
> +    uint32_t tile_size_bytes;
> +} TileGroupInfo;
> +
> +typedef struct AV1DecContext {
> +    const AVClass *class;
> +    AVCodecContext *avctx;
> +
> +    enum AVPixelFormat pix_fmt;
> +    CodedBitstreamContext *cbc;
> +    CodedBitstreamFragment current_obu;
> +
> +    AV1RawSequenceHeader raw_seq;
> +    AV1RawFrameHeader raw_frame_header;
> +    AV1RawTileGroup raw_tile_group;
> +    TileGroupInfo tile_group_info[128];
> +    uint16_t tile_num;
> +    uint16_t tg_start;
> +    uint16_t tg_end;
> +
> +    AV1Frame ref[8];
> +    AV1Frame cur_frame;
> +
> +} AV1DecContext;
> +
> +#endif /* AVCODEC_AV1DEC_H */
> diff --git a/libavcodec/version.h b/libavcodec/version.h
> index a3f9f828ee..5bdfdce363 100644
> --- a/libavcodec/version.h
> +++ b/libavcodec/version.h
> @@ -28,7 +28,7 @@
>  #include "libavutil/version.h"
>
>  #define LIBAVCODEC_VERSION_MAJOR  58
> -#define LIBAVCODEC_VERSION_MINOR 100
> +#define LIBAVCODEC_VERSION_MINOR 101
>  #define LIBAVCODEC_VERSION_MICRO 100
>
>  #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
> --
> 2.17.1
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
James Almer Aug. 21, 2020, 3 p.m. UTC | #3
On 8/21/2020 11:47 AM, Paul B Mahol wrote:
> On 8/21/20, Fei Wang <fei.w.wang@intel.com> wrote:
>> This av1 decoder is now only used for av1 hardware acceleration
>> decoder. Consider it can be extend to a local decoder like hevc
>> or vp9 in the future, so define its name as "av1" and put it into
>> external libraries codec list.
>>
>> Signed-off-by: Fei Wang <fei.w.wang@intel.com>
>> ---
>>  Changelog              |   1 +
>>  configure              |   1 +
>>  libavcodec/Makefile    |   1 +
>>  libavcodec/allcodecs.c |   1 +
>>  libavcodec/av1dec.c    | 746 +++++++++++++++++++++++++++++++++++++++++
>>  libavcodec/av1dec.h    |  89 +++++
> Sorry but those names are reserved for real native AV1 decoder.

If you actually look at the patch, you'll notice that this is basically
the template of what eventually will become the native decoder, that for
now only includes the high level bitstream parsing and the hwaccel hooks
(And thus will only output frames if one is plugged in). So the names
are fine.
Hendrik Leppkes Aug. 21, 2020, 5 p.m. UTC | #4
On Fri, Aug 21, 2020 at 4:43 PM James Almer <jamrial@gmail.com> wrote:
> > +            s->tile_num =
> > +                raw_frame_header->tile_cols * raw_frame_header->tile_rows;
> > +            tile_group_offset = raw_tile_group->tile_data.data - pkt->data;
> > +            get_tiles_info(avctx, raw_tile_group, tile_group_offset);
> > +
> > +            ret = get_current_frame(avctx);
> > +            if (ret < 0) {
> > +                av_log(avctx, AV_LOG_DEBUG, "Get current frame error:%d.\n",
> > +                       ret);
> > +                goto end;
> > +            }
> > +            if (avctx->hwaccel) {
> > +                ret = avctx->hwaccel->decode_slice(avctx, pkt->data, pkt->size);
>
> Can't you pass raw_tile_group->tile_data.data or something less broad
> than the entire packet?
>

This should match whatever the most common hwaccel APIs want, that
said decode_slice should usually take actual slice/tile data, while
start_frame can be passed an entire packet. Thats how other APIs
currently work.
DXVA2 AV1 takes tile data, so I would assume that VAAPI does as well..

In fact, looking at the VAAPI AV1 mappings in the other patch, it
takes that packet and then breaks it apart into tiles. Ideally that
should be handled in the decoder and tiles passed to decode_slice
ready for the hardware.

- Hendrik
Derek Buitenhuis Aug. 21, 2020, 5:05 p.m. UTC | #5
On 21/08/2020 16:00, James Almer wrote:
> If you actually look at the patch, you'll notice that this is basically
> the template of what eventually will become the native decoder, that for
> now only includes the high level bitstream parsing and the hwaccel hooks
> (And thus will only output frames if one is plugged in). So the names
> are fine.

Isn't there overlap with the parser here (global motion params)?

- Derek
James Almer Aug. 21, 2020, 5:21 p.m. UTC | #6
On 8/21/2020 2:05 PM, Derek Buitenhuis wrote:
> On 21/08/2020 16:00, James Almer wrote:
>> If you actually look at the patch, you'll notice that this is basically
>> the template of what eventually will become the native decoder, that for
>> now only includes the high level bitstream parsing and the hwaccel hooks
>> (And thus will only output frames if one is plugged in). So the names
>> are fine.
> 
> Isn't there overlap with the parser here (global motion params)?

cbs_av1 parses global motion params, yes. I'm not sure why this decoder
attempts to do it too, but it acknowledges the fact it's also done by
CBS. Maybe Fei Wang can explain why the duplication.

The cbs_av1 gm params parsing code should be fixed, in any case.

> 
> - Derek
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
James Almer Aug. 21, 2020, 9:58 p.m. UTC | #7
On 8/21/2020 11:42 AM, James Almer wrote:
>> +static int set_output_frame(AVFrame *srcframe, AVFrame *frame, AVPacket *pkt)
>> +{
>> +    int ret = 0;
>> +    if ((ret = av_frame_ref(frame, srcframe)) < 0) {
>> +        return ret;
>> +    }
>> +
>> +    frame->pts = pkt->pts;
>> +    frame->pkt_dts = pkt->dts;
> Did you add this because you saw bogus timestamps filled by
> ff_thread_get_buffer? If so, could you try applying
> https://patchwork.ffmpeg.org/project/ffmpeg/list/?series=2057 and
> removing this chunk, to see if it fixes it?

Actually no, it wont help. The decoder as is lacks the DELAY flag, so
it's a 1:1 decoder. Guess this is correct.

If you set frame->pkt_dts however, you need to signal it with the
FF_CODEC_CAP_SETS_PKT_DTS AVCodec.caps_internal flag.
Mark Thompson Aug. 22, 2020, 10:01 p.m. UTC | #8
On 21/08/2020 06:29, Fei Wang wrote:
> This av1 decoder is now only used for av1 hardware acceleration
> decoder. Consider it can be extend to a local decoder like hevc
> or vp9 in the future, so define its name as "av1" and put it into
> external libraries codec list.
> 
> Signed-off-by: Fei Wang <fei.w.wang@intel.com>
> ---
>   Changelog              |   1 +
>   configure              |   1 +
>   libavcodec/Makefile    |   1 +
>   libavcodec/allcodecs.c |   1 +
>   libavcodec/av1dec.c    | 746 +++++++++++++++++++++++++++++++++++++++++
>   libavcodec/av1dec.h    |  89 +++++
>   libavcodec/version.h   |   2 +-
>   7 files changed, 840 insertions(+), 1 deletion(-)
>   create mode 100644 libavcodec/av1dec.c
>   create mode 100644 libavcodec/av1dec.h
> 
> diff --git a/Changelog b/Changelog
> index 1efc768387..3ff88cc12f 100644
> --- a/Changelog
> +++ b/Changelog
> @@ -14,6 +14,7 @@ version <next>:
>   - ADPCM Argonaut Games encoder
>   - Argonaut Games ASF muxer
>   - AV1 Low overhead bitstream format demuxer
> +- AV1 decoder (Hardware acceleration used only)
>   
>   
>   version 4.3:
> diff --git a/configure b/configure
> index 6faff9bc7b..ef71e47c4e 100755
> --- a/configure
> +++ b/configure
> @@ -2685,6 +2685,7 @@ atrac3al_decoder_select="mdct"
>   atrac3p_decoder_select="mdct sinewin"
>   atrac3pal_decoder_select="mdct sinewin"
>   atrac9_decoder_select="mdct"
> +av1_decoder_select="cbs_av1"
>   avrn_decoder_select="exif jpegtables"
>   bink_decoder_select="blockdsp hpeldsp"
>   binkaudio_dct_decoder_select="mdct rdft dct sinewin wma_freqs"
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index 3431ba2dca..2371039bd2 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -764,6 +764,7 @@ OBJS-$(CONFIG_ZLIB_DECODER)            += lcldec.o
>   OBJS-$(CONFIG_ZLIB_ENCODER)            += lclenc.o
>   OBJS-$(CONFIG_ZMBV_DECODER)            += zmbv.o
>   OBJS-$(CONFIG_ZMBV_ENCODER)            += zmbvenc.o
> +OBJS-$(CONFIG_AV1_DECODER)             += av1dec.o
>   
>   # (AD)PCM decoders/encoders
>   OBJS-$(CONFIG_PCM_ALAW_DECODER)           += pcm.o
> diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
> index 4bd830e5d0..00799d3431 100644
> --- a/libavcodec/allcodecs.c
> +++ b/libavcodec/allcodecs.c
> @@ -761,6 +761,7 @@ extern AVCodec ff_idf_decoder;
>    * above is available */
>   extern AVCodec ff_aac_mf_encoder;
>   extern AVCodec ff_ac3_mf_encoder;
> +extern AVCodec ff_av1_decoder;
>   extern AVCodec ff_h263_v4l2m2m_encoder;
>   extern AVCodec ff_libaom_av1_decoder;
>   extern AVCodec ff_libopenh264_encoder;
> diff --git a/libavcodec/av1dec.c b/libavcodec/av1dec.c
> new file mode 100644
> index 0000000000..ab88ed987a
> --- /dev/null
> +++ b/libavcodec/av1dec.c
> @@ -0,0 +1,746 @@
> +/*
> + * AV1video decoder
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "avcodec.h"
> +#include "get_bits.h"
> +#include "hwconfig.h"
> +#include "internal.h"
> +#include "profiles.h"
> +#include "thread.h"
> +#include "videodsp.h"
> +#include "libavutil/avassert.h"
> +#include "libavutil/pixdesc.h"
> +
> +#include "av1dec.h"
> +#include "libavformat/av1.h"
> +
> +static void setup_past_independence(AV1Frame *f)
> +{
> +    f->loop_filter_delta_enabled = 1;
> +
> +    f->loop_filter_ref_deltas[AV1_REF_FRAME_INTRA] = 1;
> +    f->loop_filter_ref_deltas[AV1_REF_FRAME_LAST] = 0;
> +    f->loop_filter_ref_deltas[AV1_REF_FRAME_LAST2] = 0;
> +    f->loop_filter_ref_deltas[AV1_REF_FRAME_LAST3] = 0;
> +    f->loop_filter_ref_deltas[AV1_REF_FRAME_GOLDEN] = -1;
> +    f->loop_filter_ref_deltas[AV1_REF_FRAME_BWDREF] = 0;
> +    f->loop_filter_ref_deltas[AV1_REF_FRAME_ALTREF2] = -1;
> +    f->loop_filter_ref_deltas[AV1_REF_FRAME_ALTREF] = -1;
> +
> +    f->loop_filter_mode_deltas[0] = 0;
> +    f->loop_filter_mode_deltas[1] = 0;
> +}
> +
> +static void load_previous_and_update(AV1DecContext *s)
> +{
> +    int i;
> +
> +    memcpy(s->cur_frame.loop_filter_ref_deltas,
> +           s->ref[s->raw_frame_header.primary_ref_frame].loop_filter_ref_deltas,
> +           AV1_TOTAL_REFS_PER_FRAME * sizeof(int8_t));
> +    memcpy(s->cur_frame.loop_filter_mode_deltas,
> +           s->ref[s->raw_frame_header.primary_ref_frame].loop_filter_mode_deltas,
> +           2 * sizeof(int8_t));
> +
> +    if (s->raw_frame_header.loop_filter_delta_update) {
> +        for (i = 0; i < AV1_TOTAL_REFS_PER_FRAME; i++) {
> +            if (s->raw_frame_header.update_ref_delta[i])
> +                s->cur_frame.loop_filter_ref_deltas[i] =
> +                    s->raw_frame_header.loop_filter_ref_deltas[i];
> +        }
> +
> +        for (i = 0; i < 2; i++) {
> +            if (s->raw_frame_header.update_mode_delta[i])
> +                s->cur_frame.loop_filter_mode_deltas[i] =
> +                    s->raw_frame_header.loop_filter_mode_deltas[i];
> +        }
> +    }
> +
> +    s->cur_frame.loop_filter_delta_enabled =
> +        s->raw_frame_header.loop_filter_delta_enabled;
> +}
> +
> +static uint32_t inverse_recenter(int r, uint32_t v)
> +{
> +    if (v > 2 * r)
> +        return v;
> +    else if (v & 1)
> +        return r - ((v + 1) >> 1);
> +    else
> +        return r + (v >> 1);
> +}
> +
> +static uint32_t decode_unsigned_subexp_with_ref(uint32_t sub_exp,
> +                                                int mx, int r)
> +{
> +    if ((r << 1) <= mx) {
> +        return inverse_recenter(r, sub_exp);
> +    } else {
> +        return mx - 1 - inverse_recenter(mx - 1 - r, sub_exp);
> +    }
> +}
> +
> +static int32_t decode_signed_subexp_with_ref(uint32_t sub_exp, int low,
> +                                             int high, int r)
> +{
> +    int32_t x = decode_unsigned_subexp_with_ref(sub_exp, high - low, r - low);
> +    return x + low;
> +}
> +
> +static void read_global_param(AV1DecContext *s, int type, int ref, int idx)
> +{
> +    uint32_t abs_bits, prec_bits, round, prec_diff, sub, mx, r;
> +    abs_bits = AV1_GM_ABS_ALPHA_BITS;
> +    prec_bits = AV1_GM_ALPHA_PREC_BITS;
> +
> +    if (idx < 2) {
> +        if (type == AV1_WARP_MODEL_TRANSLATION) {
> +            abs_bits = AV1_GM_ABS_TRANS_ONLY_BITS -
> +                !s->raw_frame_header.allow_high_precision_mv;
> +            prec_bits = AV1_GM_TRANS_ONLY_PREC_BITS -
> +                !s->raw_frame_header.allow_high_precision_mv;
> +        } else {
> +            abs_bits = AV1_GM_ABS_TRANS_BITS;
> +            prec_bits = AV1_GM_TRANS_PREC_BITS;
> +        }
> +    }
> +    round = (idx % 3) == 2 ? (1 << AV1_WARPEDMODEL_PREC_BITS) : 0;
> +    prec_diff = AV1_WARPEDMODEL_PREC_BITS - prec_bits;
> +    sub = (idx % 3) == 2 ? (1 << prec_bits) : 0;
> +    mx = 1 << abs_bits;
> +    r = (s->ref[s->raw_frame_header.primary_ref_frame].gm_params[ref][idx]
> +         >> prec_diff) - sub;
> +
> +    s->cur_frame.gm_params[ref][idx] =
> +        (decode_signed_subexp_with_ref(s->raw_frame_header.gm_params[ref][idx],
> +                                       -mx, mx + 1, r) << prec_diff) + round;
> +}
> +
> +/**
> +* update gm type/params, since cbs already implemented part of this funcation,
> +* so we don't need to full implement spec.
> +*/
> +static void global_motion_params(AV1DecContext *s)
> +{
> +    const AV1RawFrameHeader *header = &s->raw_frame_header;
> +    int type;
> +    int i, j;
> +    for (i = AV1_REF_FRAME_LAST; i <= AV1_REF_FRAME_ALTREF; i++) {
> +        s->cur_frame.gm_type[i] = AV1_WARP_MODEL_IDENTITY;
> +        for (j = 0; j <= 5; j++)
> +            s->cur_frame.gm_params[i][j] = (j % 3 == 2) ?
> +                                           1 << AV1_WARPEDMODEL_PREC_BITS : 0;
> +    }
> +    if (header->frame_type == AV1_FRAME_KEY ||
> +        header->frame_type == AV1_FRAME_INTRA_ONLY)
> +        return;
> +
> +    for (i = AV1_REF_FRAME_LAST; i <= AV1_REF_FRAME_ALTREF; i++) {
> +        if (header->is_global[i]) {
> +            if (header->is_rot_zoom[i]) {
> +                type = AV1_WARP_MODEL_ROTZOOM;
> +            } else {
> +                type = header->is_translation[i] ? AV1_WARP_MODEL_TRANSLATION
> +                                                 : AV1_WARP_MODEL_AFFINE;
> +            }
> +        } else {
> +            type = AV1_WARP_MODEL_IDENTITY;
> +        }
> +        s->cur_frame.gm_type[i] = type;
> +
> +        if (type >= AV1_WARP_MODEL_ROTZOOM) {
> +            read_global_param(s, type, i, 2);
> +            read_global_param(s, type, i, 3);
> +            if (type == AV1_WARP_MODEL_AFFINE) {
> +                read_global_param(s, type, i, 4);
> +                read_global_param(s, type, i, 5);
> +            } else {
> +                s->cur_frame.gm_params[i][4] = - s->cur_frame.gm_params[i][3];
> +                s->cur_frame.gm_params[i][5] = - s->cur_frame.gm_params[i][2];
> +            }
> +        }
> +        if (type >= AV1_WARP_MODEL_TRANSLATION) {
> +            read_global_param(s, type, i, 0);
> +            read_global_param(s, type, i, 1);
> +        }
> +    }
> +}
> +
> +static int get_tiles_info(AVCodecContext *avctx,
> +                          AV1RawTileGroup *tile_group,
> +                          uint32_t tile_group_offset)
> +{
> +    AV1DecContext *s = avctx->priv_data;
> +    GetBitContext gb;
> +    uint16_t tile_num, tile_row, tile_col;
> +    uint16_t mi_cols, mi_rows, sb_cols, sb_rows, tile_width_sb, tile_height_sb;
> +    uint32_t size = 0, size_bytes = 0, offset = 0;
> +    int ret = 0;
> +
> +    if ((ret = init_get_bits8(&gb,
> +                              tile_group->tile_data.data,
> +                              tile_group->tile_data.data_size)) < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "Fail to initialize bitstream reader.\n");
> +        return ret;
> +    }
> +
> +    s->tg_start = tile_group->tg_start;
> +    s->tg_end = tile_group->tg_end;
> +
> +    if (s->raw_frame_header.uniform_tile_spacing_flag) {
> +        mi_cols = 2 * ((s->raw_seq.max_frame_width_minus_1 + 8) >> 3);
> +        mi_rows = 2 * ((s->raw_seq.max_frame_height_minus_1 + 8) >> 3);
> +        sb_cols = s->raw_seq.use_128x128_superblock ? ((mi_cols + 31) >> 5)
> +                                                    : ((mi_cols + 15) >> 4);
> +        sb_rows = s->raw_seq.use_128x128_superblock ? ((mi_rows + 31) >> 5)
> +                                                    : ((mi_rows + 15) >> 4);
> +        tile_width_sb = (sb_cols + (1 << s->raw_frame_header.tile_cols_log2)
> +                         -1) >> s->raw_frame_header.tile_cols_log2;
> +        tile_height_sb = (sb_rows + (1 << s->raw_frame_header.tile_rows_log2)
> +                         -1) >> s->raw_frame_header.tile_rows_log2;

Maybe cbs read should always fill the width/height_in_sbs arrays so that all the special casing for uniform_spacing_flag in this function isn't needed?  It would be trivial to add it there.

> +    }
> +
> +    for (tile_num = tile_group->tg_start; tile_num <= tile_group->tg_end; tile_num++) {
> +        tile_row = tile_num / s->raw_frame_header.tile_cols;
> +        tile_col = tile_num % s->raw_frame_header.tile_cols;
> +
> +        if (tile_num == tile_group->tg_end) {
> +            s->tile_group_info[tile_num].tile_group_offset = tile_group_offset;
> +            s->tile_group_info[tile_num].tile_size = get_bits_left(&gb) / 8;
> +            s->tile_group_info[tile_num].tile_offset = offset;
> +            s->tile_group_info[tile_num].tile_row = tile_row;
> +            s->tile_group_info[tile_num].tile_column = tile_col;
> +            s->tile_group_info[tile_num].tile_size_bytes = size_bytes;
> +            if (s->raw_frame_header.uniform_tile_spacing_flag) {
> +                s->tile_group_info[tile_num].tile_width_sbs =
> +                    (tile_col + 1) == s->raw_frame_header.tile_cols ?
> +                    sb_cols - tile_col * tile_width_sb :
> +                    tile_width_sb;
> +                s->tile_group_info[tile_num].tile_height_sbs =
> +                    (tile_row + 1) == s->raw_frame_header.tile_rows ?
> +                    sb_rows - tile_row * tile_height_sb :
> +                    tile_height_sb;
> +            } else {
> +                s->tile_group_info[tile_num].tile_width_sbs =
> +                    s->raw_frame_header.width_in_sbs_minus_1[tile_col] + 1;
> +                s->tile_group_info[tile_num].tile_height_sbs =
> +                    s->raw_frame_header.height_in_sbs_minus_1[tile_row] + 1;
> +            }
> +            return 0;
> +        }
> +        size_bytes = s->raw_frame_header.tile_size_bytes_minus1 + 1;
> +        size = get_bits_le(&gb, size_bytes * 8) + 1;
> +        skip_bits(&gb, size * 8);
> +
> +        offset += size_bytes;
> +
> +        s->tile_group_info[tile_num].tile_group_offset = tile_group_offset;
> +        s->tile_group_info[tile_num].tile_size = size;
> +        s->tile_group_info[tile_num].tile_offset = offset;
> +        s->tile_group_info[tile_num].tile_row = tile_row;
> +        s->tile_group_info[tile_num].tile_column = tile_col;
> +        s->tile_group_info[tile_num].tile_size_bytes = size_bytes;

This would be nicer as a compound literal.

> +        if (s->raw_frame_header.uniform_tile_spacing_flag) {
> +            s->tile_group_info[tile_num].tile_width_sbs =
> +                (tile_col + 1) == s->raw_frame_header.tile_cols ?
> +                sb_cols - tile_col * tile_width_sb :
> +                tile_width_sb;
> +            s->tile_group_info[tile_num].tile_height_sbs =
> +                (tile_row + 1) == s->raw_frame_header.tile_rows ?
> +                sb_rows - tile_row * tile_height_sb :
> +                tile_height_sb;
> +        } else {
> +            s->tile_group_info[tile_num].tile_width_sbs =
> +                s->raw_frame_header.width_in_sbs_minus_1[tile_col] + 1;
> +            s->tile_group_info[tile_num].tile_height_sbs =
> +                s->raw_frame_header.height_in_sbs_minus_1[tile_row] + 1;
> +        }
> +        offset += size;
> +    }
> +
> +    return 0;
> +
> +}
> +
> +static int get_pixel_format(AVCodecContext *avctx)
> +{
> +    AV1DecContext *s = avctx->priv_data;
> +    const AV1RawSequenceHeader *seq = &s->raw_seq;
> +    uint8_t bit_depth;
> +    int ret = 0;
> +    enum AVPixelFormat pix_fmt = AV_PIX_FMT_NONE;
> +#define HWACCEL_MAX (0)
> +    enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
> +
> +    if (seq->seq_profile == 2 && seq->color_config.high_bitdepth)
> +        bit_depth = seq->color_config.twelve_bit ? 12 : 10;
> +    else if (seq->seq_profile <= 2)
> +        bit_depth = seq->color_config.high_bitdepth ? 10 : 8;
> +    else {
> +        av_log(avctx, AV_LOG_ERROR,
> +               "Unknow av1 profile %d.\n", seq->seq_profile);
> +        return -1;
> +    }
> +
> +    if (!seq->color_config.mono_chrome) {
> +        // 4:4:4 x:0 y:0, 4:2:2 x:1 y:0, 4:2:0 x:1 y:1
> +        if (seq->color_config.subsampling_x == 0 &&
> +            seq->color_config.subsampling_y == 0) {
> +            if (bit_depth == 8)
> +                pix_fmt = AV_PIX_FMT_YUV444P;
> +            else if (bit_depth == 10)
> +                pix_fmt = AV_PIX_FMT_YUV444P10;
> +            else if (bit_depth == 12)
> +                pix_fmt = AV_PIX_FMT_YUV444P12;
> +            else
> +                av_log(avctx, AV_LOG_WARNING, "Unknow av1 pixel format.\n");
> +        } else if (seq->color_config.subsampling_x == 1 &&
> +                   seq->color_config.subsampling_y == 0) {
> +            if (bit_depth == 8)
> +                pix_fmt = AV_PIX_FMT_YUV422P;
> +            else if (bit_depth == 10)
> +                pix_fmt = AV_PIX_FMT_YUV422P10;
> +            else if (bit_depth == 12)
> +                pix_fmt = AV_PIX_FMT_YUV422P12;
> +            else
> +                av_log(avctx, AV_LOG_WARNING, "Unknow av1 pixel format.\n");
> +        } else if (seq->color_config.subsampling_x == 1 &&
> +                   seq->color_config.subsampling_y == 1) {
> +            if (bit_depth == 8)
> +                pix_fmt = AV_PIX_FMT_YUV420P;
> +            else if (bit_depth == 10)
> +                pix_fmt = AV_PIX_FMT_YUV420P10;
> +            else if (bit_depth == 12)
> +                pix_fmt = AV_PIX_FMT_YUV420P12;
> +            else
> +                av_log(avctx, AV_LOG_WARNING, "Unknow av1 pixel format.\n");
> +        }
> +    } else {
> +        if (seq->color_config.subsampling_x == 1 &&
> +            seq->color_config.subsampling_y == 1)
> +            pix_fmt = AV_PIX_FMT_YUV440P;
> +        else
> +            av_log(avctx, AV_LOG_WARNING, "Unknow av1 pixel format.\n");
> +    }
> +
> +    av_log(avctx, AV_LOG_DEBUG, "Av1 decode get format: %s.\n",
> +           av_get_pix_fmt_name(pix_fmt));
> +
> +    if (pix_fmt == AV_PIX_FMT_NONE)
> +        return -1;
> +    s->pix_fmt = pix_fmt;
> +
> +    *fmtp++ = s->pix_fmt;
> +    *fmtp = AV_PIX_FMT_NONE;
> +    ret = ff_thread_get_format(avctx, pix_fmts);

Passing the software format here is bad, because it makes it appear to the user as though it can be decoded.  I think you would be better off setting avctx->sw_pix_fmt to the software format and then only passing the available hardware formats to ff_get_format().

> +    if (ret < 0)
> +        return ret;
> +
> +    avctx->pix_fmt = ret;

(ff_get_format() does make sure the returned value is in the list, which means if the unusable software format is in the list then it will be happy to return it.)

> +
> +    return 0;
> +}
> +
> +static void av1_frame_unref(AVCodecContext *avctx, AV1Frame *f)
> +{
> +    ff_thread_release_buffer(avctx, &f->tf);
> +    av_buffer_unref(&f->hwaccel_priv_buf);
> +    f->hwaccel_picture_private = NULL;
> +}
> +
> +static int av1_frame_ref(AVCodecContext *avctx, AV1Frame *dst, AV1Frame *src)
> +{
> +    int ret = 0;
> +
> +    ret = ff_thread_ref_frame(&dst->tf, &src->tf);
> +    if (ret < 0)
> +        return ret;
> +
> +    if (src->hwaccel_picture_private) {
> +        dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
> +        if (!dst->hwaccel_priv_buf)
> +            goto fail;
> +        dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
> +    }
> +
> +    dst->loop_filter_delta_enabled = src->loop_filter_delta_enabled;
> +    memcpy(dst->loop_filter_ref_deltas,
> +           src->loop_filter_ref_deltas,
> +           AV1_TOTAL_REFS_PER_FRAME * sizeof(int8_t));
> +    memcpy(dst->loop_filter_mode_deltas,
> +           src->loop_filter_mode_deltas,
> +           2 * sizeof(int8_t));
> +    memcpy(dst->gm_type,
> +           src->gm_type,
> +           AV1_TOTAL_REFS_PER_FRAME * sizeof(uint8_t));
> +    memcpy(dst->gm_params,
> +           src->gm_params,
> +           AV1_TOTAL_REFS_PER_FRAME * 6 * sizeof(int8_t));
> +
> +    return 0;
> +
> +fail:
> +    av1_frame_unref(avctx, dst);
> +    return AVERROR(ENOMEM);
> +}
> +
> +static av_cold int av1_decode_free(AVCodecContext *avctx)
> +{
> +    AV1DecContext *s = avctx->priv_data;
> +    int i;
> +
> +    for (i = 0; i < FF_ARRAY_ELEMS(s->ref); i++) {
> +        if (s->ref[i].tf.f->buf[0])
> +            av1_frame_unref(avctx, &s->ref[i]);
> +        av_frame_free(&s->ref[i].tf.f);
> +    }
> +    if (s->cur_frame.tf.f->buf[0])
> +        av1_frame_unref(avctx, &s->cur_frame);
> +    av_frame_free(&s->cur_frame.tf.f);
> +
> +    ff_cbs_fragment_free(&s->current_obu);
> +    ff_cbs_close(&s->cbc);
> +
> +    return 0;
> +}
> +
> +static av_cold int av1_decode_init(AVCodecContext *avctx)
> +{
> +    AV1DecContext *s = avctx->priv_data;
> +    int i;
> +    int ret = 0;
> +
> +    for (i = 0; i < FF_ARRAY_ELEMS(s->ref); i++) {
> +        s->ref[i].tf.f = av_frame_alloc();
> +        if (!s->ref[i].tf.f) {
> +            av1_decode_free(avctx);
> +            av_log(avctx, AV_LOG_ERROR,
> +                   "Fail to allocate frame buffer %d.\n", i);

In this message and others below you have written "Fail to X" where I think you should probably write "Failed to X".

> +            return AVERROR(ENOMEM);
> +        }
> +    }
> +
> +        s->cur_frame.tf.f = av_frame_alloc();
> +        if (!s->cur_frame.tf.f) {
> +            av1_decode_free(avctx);
> +            av_log(avctx, AV_LOG_ERROR, "Fail to allocate frame buffer.\n");
> +            return AVERROR(ENOMEM);
> +        }

Alignment is off.

> +
> +    s->avctx = avctx;
> +    s->pix_fmt = AV_PIX_FMT_NONE;
> +
> +    ret = ff_cbs_init(&s->cbc, AV_CODEC_ID_AV1, avctx);
> +    if (ret < 0)
> +        return ret;
> +    return 0;
> +}
> +
> +static int av1_frame_alloc(AVCodecContext *avctx, AV1Frame *f)
> +{
> +    int ret;
> +    if ((ret = ff_thread_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
> +        return ret;
> +
> +    if (avctx->hwaccel) {
> +        const AVHWAccel *hwaccel = avctx->hwaccel;
> +        if (hwaccel->frame_priv_data_size) {
> +            f->hwaccel_priv_buf =
> +                av_buffer_allocz(hwaccel->frame_priv_data_size);
> +            if (!f->hwaccel_priv_buf)
> +                goto fail;
> +            f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
> +        }
> +    }
> +    return 0;
> +
> +fail:
> +    av1_frame_unref(avctx, f);
> +    return AVERROR(ENOMEM);
> +}
> +
> +static int set_output_frame(AVFrame *srcframe, AVFrame *frame, AVPacket *pkt)
> +{
> +    int ret = 0;
> +    if ((ret = av_frame_ref(frame, srcframe)) < 0) {
> +        return ret;
> +    }
> +
> +    frame->pts = pkt->pts;
> +    frame->pkt_dts = pkt->dts;
> +
> +    return 0;
> +}
> +
> +static int update_reference_list (AVCodecContext *avctx)
> +{
> +    AV1DecContext *s = avctx->priv_data;
> +    AV1RawFrameHeader *header= &s->raw_frame_header;
> +    int i;
> +    int ret = 0;
> +
> +    for (i = 0; i < 8; i++) {
> +        if (header->frame_type == AV1_FRAME_KEY ||
> +            (header->refresh_frame_flags & (1 << i))) {
> +            if (s->ref[i].tf.f->buf[0])
> +                av1_frame_unref(avctx, &s->ref[i]);
> +            if ((ret = av1_frame_ref(avctx, &s->ref[i], &s->cur_frame)) < 0) {
> +                av_log(avctx, AV_LOG_DEBUG, "Ref frame error:%d.\n", ret);
> +                return ret;
> +            }
> +        }
> +    }
> +    return 0;
> +}
> +
> +static int get_current_frame(AVCodecContext *avctx)
> +{
> +    AV1DecContext *s = avctx->priv_data;
> +    int ret = 0;
> +
> +    if (s->cur_frame.tf.f->buf[0])
> +        av1_frame_unref(avctx, &s->cur_frame);
> +
> +    ret = av1_frame_alloc(avctx, &s->cur_frame);
> +    if (ret < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "Get current frame fail.\n");

This would be an allocation failure?  Maybe "Failed to allocate space for current frame", or something like that.

> +        return ret;
> +    }
> +
> +    if (s->raw_frame_header.primary_ref_frame == AV1_PRIMARY_REF_NONE)
> +        setup_past_independence(&s->cur_frame);
> +    else
> +        load_previous_and_update(s);
> +
> +    global_motion_params(s);
> +
> +    return ret;
> +}
> +
> +static int decode_current_frame(AVCodecContext *avctx)
> +{
> +    int ret = 0;
> +
> +    if (avctx->hwaccel) {
> +        ret = avctx->hwaccel->start_frame(avctx, NULL, 0);
> +        if (ret < 0) {
> +            av_log(avctx, AV_LOG_ERROR, "HW accel strat frame fail.\n");

Typo "strat".

> +            return ret;
> +        }

start_frame() is in the wrong place here?  All of the calls to decode_slice() for a frame should be between the matching start_frame() and end_frame().  (I guess this works for VAAPI because it doesn't mind what order the parameter buffers go in, but it may matter for others.)

> +
> +        ret = avctx->hwaccel->end_frame(avctx);
> +        if (ret < 0) {
> +            av_log(avctx, AV_LOG_ERROR, "HW accel end frame fail.\n");
> +            return ret;
> +        }
> +    } else {
> +        av_log(avctx, AV_LOG_ERROR, "Your platform doesn't suppport hardware "
> +               "acceleration AV1 decode.\n");

I think you can assert on this one, because something must have gone very wrong to end up here.

> +        return -1;
> +    }
> +
> +    ret = update_reference_list(avctx);
> +    if (ret < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "Fail to update reference list.\n");
> +        return ret;
> +    }
> +    return 0;
> +}
> +
> +static int av1_decode_frame(AVCodecContext *avctx, void *frame,
> +                            int *got_frame, AVPacket *pkt)
> +{
> +    AV1DecContext *s = avctx->priv_data;
> +    AV1RawSequenceHeader* raw_seq = NULL;
> +    AV1RawFrameHeader *raw_frame_header = NULL;
> +    AV1RawTileGroup *raw_tile_group = NULL;
> +    uint32_t tile_group_offset = 0;
> +    int i;
> +    int ret = 0;
> +
> +    ret = ff_cbs_read_packet(s->cbc, &s->current_obu, pkt);
> +    if (ret < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "Fail to read packet.\n");
> +        goto end;
> +    }
> +    av_log(avctx, AV_LOG_DEBUG, "Total obu for this frame:%d.\n",
> +           s->current_obu.nb_units);
> +
> +    for (i = 0; i < s->current_obu.nb_units; i++) {
> +        CodedBitstreamUnit *unit = &s->current_obu.units[i];
> +        AV1RawOBU *obu = unit->content;
> +        av_log(avctx, AV_LOG_DEBUG, "Obu idx:%d, obu type:%d.\n", i, unit->type);
> +
> +        switch (unit->type) {
> +        case AV1_OBU_SEQUENCE_HEADER:
> +            raw_seq = &obu->obu.sequence_header;
> +            memcpy(&s->raw_seq, raw_seq, sizeof(AV1RawSequenceHeader));
> +            if (s->pix_fmt == AV_PIX_FMT_NONE) {
> +                ret = get_pixel_format(avctx);
> +                if (ret < 0) {
> +                    av_log(avctx, AV_LOG_ERROR,
> +                           "Fail to get format from sequence header.\n");

That message isn't quite right - the more likely failure here is that the hardware setup didn't work.

I think you want to make sure that all of the messages generated if someone tries to use this decoder without appropriate hardware are helpful.  (In both the "no API support" and the "has API support but the hardware isn't capable" cases.)

> +                    goto end;
> +                }
> +            }
> +            break;
> +        case AV1_OBU_FRAME_HEADER:
> +        case AV1_OBU_REDUNDANT_FRAME_HEADER:
> +            raw_frame_header = &obu->obu.frame_header;
> +            memcpy (&s->raw_frame_header,
> +                    raw_frame_header,
> +                    sizeof(AV1RawFrameHeader));
> +            s->tile_num =
> +                raw_frame_header->tile_cols * raw_frame_header->tile_rows;
> +            if (raw_frame_header->show_existing_frame) {
> +                if (set_output_frame(s->ref[raw_frame_header->frame_to_show_map_idx].tf.f,
> +                                     frame, pkt) < 0) {
> +                    av_log(avctx, AV_LOG_DEBUG, "Set output frame error:%d.\n",
> +                           ret);
> +                    goto end;
> +                }
> +                *got_frame = 1;
> +                goto end;
> +            } else {
> +                ret = get_current_frame(avctx);
> +                if (ret < 0) {
> +                    av_log(avctx, AV_LOG_DEBUG, "Get current frame error:%d.\n",
> +                           ret);
> +                    goto end;
> +                }
> +            }
> +            break;
> +        case AV1_OBU_FRAME:
> +            raw_frame_header = &obu->obu.frame.header;
> +            raw_tile_group = &obu->obu.frame.tile_group;
> +            memcpy(&s->raw_frame_header,
> +                   raw_frame_header,
> +                   sizeof(AV1RawFrameHeader));
> +            s->tile_num =
> +                raw_frame_header->tile_cols * raw_frame_header->tile_rows;
> +            tile_group_offset = raw_tile_group->tile_data.data - pkt->data;
> +            get_tiles_info(avctx, raw_tile_group, tile_group_offset);
> +
> +            ret = get_current_frame(avctx);
> +            if (ret < 0) {
> +                av_log(avctx, AV_LOG_DEBUG, "Get current frame error:%d.\n",
> +                       ret);
> +                goto end;
> +            }
> +            if (avctx->hwaccel) {
> +                ret = avctx->hwaccel->decode_slice(avctx, pkt->data, pkt->size);
> +                if (ret < 0) {
> +                    av_log(avctx, AV_LOG_ERROR,
> +                           "HW accel decode slice fail.\n");
> +                    return ret;
> +                }
> +            }
> +            ret = decode_current_frame(avctx);
> +            if (ret < 0) {
> +                av_log(avctx, AV_LOG_DEBUG, "Decode current frame error:%d.\n",
> +                       ret);
> +                goto end;
> +            }
> +            if (raw_frame_header->show_frame) {
> +                if (set_output_frame(s->cur_frame.tf.f, frame, pkt) < 0) {
> +                    av_log(avctx, AV_LOG_DEBUG, "Set output frame error:%d.\n",
> +                           ret);
> +                    goto end;
> +                }
> +
> +                *got_frame = 1;
> +            }
> +            break;
> +        case AV1_OBU_TILE_GROUP:
> +            raw_tile_group = &obu->obu.tile_group;
> +            tile_group_offset = raw_tile_group->tile_data.data - pkt->data;
> +            get_tiles_info(avctx, raw_tile_group, tile_group_offset);
> +
> +            if (avctx->hwaccel) {
> +                ret = avctx->hwaccel->decode_slice(avctx, pkt->data, pkt->size);
> +                if (ret < 0) {
> +                    av_log(avctx, AV_LOG_ERROR,
> +                           "HW accel decode slice fail.\n");
> +                    return ret;
> +                }
> +            }
> +            if (s->tile_num != raw_tile_group->tg_end + 1)
> +                break;
> +            ret = decode_current_frame(avctx);
> +            if (ret < 0) {
> +                av_log(avctx, AV_LOG_DEBUG, "Decode current frame error:%d.\n",
> +                       ret);
> +                goto end;
> +            }
> +
> +            if (raw_frame_header->show_frame) {
> +                if (set_output_frame(s->cur_frame.tf.f, frame, pkt) < 0) {
> +                    av_log(avctx, AV_LOG_DEBUG, "Set output frame error:%d.\n",
> +                           ret);
> +                    goto end;
> +                }
> +
> +                *got_frame = 1;
> +            }

This section is the same as the FRAME case.  Maybe move it after the switch and finish decoding the frame if tile_num shows you are at the end of the frame?

> +            break;
> +        case AV1_OBU_TILE_LIST:
> +        case AV1_OBU_TEMPORAL_DELIMITER:
> +        case AV1_OBU_PADDING:
> +        case AV1_OBU_METADATA:
> +            break;
> +        default:
> +            av_log(avctx, AV_LOG_DEBUG,
> +                   "Un-implemented obu type: %d (%zd bits).\n",
> +                   unit->type, unit->data_size);
> +        }
> +    }
> +
> +end:
> +    ff_cbs_fragment_reset(&s->current_obu);
> +    return ret;
> +}
> +
> +static void av1_decode_flush(AVCodecContext *avctx)
> +{
> +    AV1DecContext *s = avctx->priv_data;
> +    int i;
> +
> +    for (i = 0; i < FF_ARRAY_ELEMS(s->ref); i++)
> +        av1_frame_unref(avctx, &s->ref[i]);
> +
> +    av1_frame_unref(avctx, &s->cur_frame);
> +}
> +
> +AVCodec ff_av1_decoder = {
> +    .name                  = "av1",
> +    .long_name             = NULL_IF_CONFIG_SMALL("Hardware Acceleration AV1"),
> +    .type                  = AVMEDIA_TYPE_VIDEO,
> +    .id                    = AV_CODEC_ID_AV1,
> +    .priv_data_size        = sizeof(AV1DecContext),
> +    .init                  = av1_decode_init,
> +    .close                 = av1_decode_free,
> +    .decode                = av1_decode_frame,
> +    .capabilities          = AV_CODEC_CAP_DR1,
> +    .flush                 = av1_decode_flush,
> +    .profiles              = NULL_IF_CONFIG_SMALL(ff_av1_profiles),
> +    .hw_configs            = (const AVCodecHWConfigInternal * []) {
> +        NULL
> +    },
> +};
> diff --git a/libavcodec/av1dec.h b/libavcodec/av1dec.h
> new file mode 100644
> index 0000000000..5cb0e896b7
> --- /dev/null
> +++ b/libavcodec/av1dec.h
> @@ -0,0 +1,89 @@
> +/*
> + * AV1 video decoder
> + * *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVCODEC_AV1DEC_H
> +#define AVCODEC_AV1DEC_H
> +
> +#include <stdatomic.h>
> +#include "libavutil/buffer.h"
> +#include "libavutil/md5.h"
> +#include "avcodec.h"
> +#include "bswapdsp.h"
> +#include "get_bits.h"
> +#include "av1_parse.h"
> +#include "libavformat/avformat.h"
> +#include "libavformat/av1.h"
> +#include "thread.h"
> +#include "cbs.h"
> +#include "cbs_av1.h"

You should include the minimal number of other headers in the header itself, include them in the c file if needed.  That said, I think most of these are not used anyway.

> +
> +typedef enum AV1FrameRestorationType {
> +    AV1_RESTORE_NONE,
> +    AV1_RESTORE_WIENER,
> +    AV1_RESTORE_SGRPROJ,
> +    AV1_RESTORE_SWITCHABLE,
> +} AV1FrameRestorationType;
> +
> +typedef struct AV1Frame {
> +    ThreadFrame tf;
> +
> +    AVBufferRef *hwaccel_priv_buf;
> +    void *hwaccel_picture_private;
> +
> +    uint8_t loop_filter_delta_enabled;
> +    int8_t  loop_filter_ref_deltas[AV1_TOTAL_REFS_PER_FRAME];
> +    int8_t  loop_filter_mode_deltas[2];
> +    uint8_t gm_type[AV1_TOTAL_REFS_PER_FRAME];
> +    int32_t gm_params[AV1_TOTAL_REFS_PER_FRAME][6];
> +} AV1Frame;
> +
> +typedef struct TileGroupInfo {
> +    uint32_t tile_group_offset;
> +    uint32_t tile_offset;
> +    uint32_t tile_size;
> +    uint16_t tile_row;
> +    uint16_t tile_column;
> +    uint16_t tile_width_sbs;
> +    uint16_t tile_height_sbs;
> +    uint32_t tile_size_bytes;
> +} TileGroupInfo;
> +
> +typedef struct AV1DecContext {
> +    const AVClass *class;
> +    AVCodecContext *avctx;
> +
> +    enum AVPixelFormat pix_fmt;
> +    CodedBitstreamContext *cbc;
> +    CodedBitstreamFragment current_obu;
> +
> +    AV1RawSequenceHeader raw_seq;
> +    AV1RawFrameHeader raw_frame_header;
> +    AV1RawTileGroup raw_tile_group;
> +    TileGroupInfo tile_group_info[128];

Where did 128 come from?  I'm not aware of any restriction here beyond MAX_TILE_COLS and MAX_TILE_ROWS, but I admit my knowledge of the standard may be lacking.

Regardless, any limit here will need to also be checked in the code above to ensure a malicious stream can't overflow the array.

> +    uint16_t tile_num;
> +    uint16_t tg_start;
> +    uint16_t tg_end;
> +
> +    AV1Frame ref[8];
> +    AV1Frame cur_frame;
> +
> +} AV1DecContext;
> +
> +#endif /* AVCODEC_AV1DEC_H */
> diff --git a/libavcodec/version.h b/libavcodec/version.h
> index a3f9f828ee..5bdfdce363 100644
> --- a/libavcodec/version.h
> +++ b/libavcodec/version.h
> @@ -28,7 +28,7 @@
>   #include "libavutil/version.h"
>   
>   #define LIBAVCODEC_VERSION_MAJOR  58
> -#define LIBAVCODEC_VERSION_MINOR 100
> +#define LIBAVCODEC_VERSION_MINOR 101
>   #define LIBAVCODEC_VERSION_MICRO 100
>   
>   #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
> 

Thanks,

- Mark
Fei Wang Aug. 24, 2020, 2:43 a.m. UTC | #9
> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of James
> Almer
> Sent: Friday, August 21, 2020 11:01 PM
> To: ffmpeg-devel@ffmpeg.org
> Subject: Re: [FFmpeg-devel] [PATCH 1/2] avcodec: add av1 hardware
> acceleration decoder
> 
> On 8/21/2020 11:47 AM, Paul B Mahol wrote:
> > On 8/21/20, Fei Wang <fei.w.wang@intel.com> wrote:
> >> This av1 decoder is now only used for av1 hardware acceleration
> >> decoder. Consider it can be extend to a local decoder like hevc or
> >> vp9 in the future, so define its name as "av1" and put it into
> >> external libraries codec list.
> >>
> >> Signed-off-by: Fei Wang <fei.w.wang@intel.com>
> >> ---
> >>  Changelog              |   1 +
> >>  configure              |   1 +
> >>  libavcodec/Makefile    |   1 +
> >>  libavcodec/allcodecs.c |   1 +
> >>  libavcodec/av1dec.c    | 746
> +++++++++++++++++++++++++++++++++++++++++
> >>  libavcodec/av1dec.h    |  89 +++++
> > Sorry but those names are reserved for real native AV1 decoder.
> 
> If you actually look at the patch, you'll notice that this is basically the template
> of what eventually will become the native decoder, that for now only includes
> the high level bitstream parsing and the hwaccel hooks (And thus will only output
> frames if one is plugged in). So the names are fine.
Yes, the framework of this decoder is keep same as other decoders like hevc/vp9,
that means it will be flexible to extend to be a native decoder.

Fei
Thanks
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org
> with subject "unsubscribe".
James Almer Aug. 24, 2020, 4:14 a.m. UTC | #10
On 8/21/2020 2:29 AM, Fei Wang wrote:
> +static int update_reference_list (AVCodecContext *avctx)
> +{
> +    AV1DecContext *s = avctx->priv_data;
> +    AV1RawFrameHeader *header= &s->raw_frame_header;
> +    int i;
> +    int ret = 0;
> +
> +    for (i = 0; i < 8; i++) {

Use AV1_NUM_REF_FRAMES instead of 8.

> +        if (header->frame_type == AV1_FRAME_KEY ||

There's no need to check for this, and it's in fact wrong (AV1_FRAME_KEY
where show_frame == 0 does not necessarily refresh all frames).

refresh_frame_flags will have the correct value for the frame as filled
by CBS.

> +            (header->refresh_frame_flags & (1 << i))) {
> +            if (s->ref[i].tf.f->buf[0])
> +                av1_frame_unref(avctx, &s->ref[i]);
> +            if ((ret = av1_frame_ref(avctx, &s->ref[i], &s->cur_frame)) < 0) {
> +                av_log(avctx, AV_LOG_DEBUG, "Ref frame error:%d.\n", ret);
> +                return ret;
> +            }
> +        }
> +    }
> +    return 0;
> +}
Fei Wang Aug. 24, 2020, 6:57 a.m. UTC | #11
> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of James
> Almer
> Sent: Friday, August 21, 2020 10:43 PM
> To: ffmpeg-devel@ffmpeg.org
> Subject: Re: [FFmpeg-devel] [PATCH 1/2] avcodec: add av1 hardware
> acceleration decoder
> 
> On 8/21/2020 2:29 AM, Fei Wang wrote:
> > This av1 decoder is now only used for av1 hardware acceleration
> > decoder. Consider it can be extend to a local decoder like hevc or vp9
> > in the future, so define its name as "av1" and put it into external
> > libraries codec list.
> >
> > Signed-off-by: Fei Wang <fei.w.wang@intel.com>
> > ---
> >  Changelog              |   1 +
> >  configure              |   1 +
> >  libavcodec/Makefile    |   1 +
> >  libavcodec/allcodecs.c |   1 +
> >  libavcodec/av1dec.c    | 746
> +++++++++++++++++++++++++++++++++++++++++
> >  libavcodec/av1dec.h    |  89 +++++
> >  libavcodec/version.h   |   2 +-
> >  7 files changed, 840 insertions(+), 1 deletion(-)  create mode 100644
> > libavcodec/av1dec.c  create mode 100644 libavcodec/av1dec.h
> >
> > diff --git a/Changelog b/Changelog
> > index 1efc768387..3ff88cc12f 100644
> > --- a/Changelog
> > +++ b/Changelog
> > @@ -14,6 +14,7 @@ version <next>:
> >  - ADPCM Argonaut Games encoder
> >  - Argonaut Games ASF muxer
> >  - AV1 Low overhead bitstream format demuxer
> > +- AV1 decoder (Hardware acceleration used only)
> >
> >
> >  version 4.3:
> > diff --git a/configure b/configure
> > index 6faff9bc7b..ef71e47c4e 100755
> > --- a/configure
> > +++ b/configure
> > @@ -2685,6 +2685,7 @@ atrac3al_decoder_select="mdct"
> >  atrac3p_decoder_select="mdct sinewin"
> >  atrac3pal_decoder_select="mdct sinewin"
> >  atrac9_decoder_select="mdct"
> > +av1_decoder_select="cbs_av1"
> >  avrn_decoder_select="exif jpegtables"
> >  bink_decoder_select="blockdsp hpeldsp"
> >  binkaudio_dct_decoder_select="mdct rdft dct sinewin wma_freqs"
> > diff --git a/libavcodec/Makefile b/libavcodec/Makefile index
> > 3431ba2dca..2371039bd2 100644
> > --- a/libavcodec/Makefile
> > +++ b/libavcodec/Makefile
> > @@ -764,6 +764,7 @@ OBJS-$(CONFIG_ZLIB_DECODER)            += lcldec.o
> >  OBJS-$(CONFIG_ZLIB_ENCODER)            += lclenc.o
> >  OBJS-$(CONFIG_ZMBV_DECODER)            += zmbv.o
> >  OBJS-$(CONFIG_ZMBV_ENCODER)            += zmbvenc.o
> > +OBJS-$(CONFIG_AV1_DECODER)             += av1dec.o
> >
> >  # (AD)PCM decoders/encoders
> >  OBJS-$(CONFIG_PCM_ALAW_DECODER)           += pcm.o
> > diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c index
> > 4bd830e5d0..00799d3431 100644
> > --- a/libavcodec/allcodecs.c
> > +++ b/libavcodec/allcodecs.c
> > @@ -761,6 +761,7 @@ extern AVCodec ff_idf_decoder;
> >   * above is available */
> >  extern AVCodec ff_aac_mf_encoder;
> >  extern AVCodec ff_ac3_mf_encoder;
> > +extern AVCodec ff_av1_decoder;
> >  extern AVCodec ff_h263_v4l2m2m_encoder;  extern AVCodec
> > ff_libaom_av1_decoder;  extern AVCodec ff_libopenh264_encoder; diff
> > --git a/libavcodec/av1dec.c b/libavcodec/av1dec.c new file mode 100644
> > index 0000000000..ab88ed987a
> > --- /dev/null
> > +++ b/libavcodec/av1dec.c
> > @@ -0,0 +1,746 @@
> > +/*
> > + * AV1video decoder
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> > +02110-1301 USA  */
> > +
> > +#include "avcodec.h"
> > +#include "get_bits.h"
> > +#include "hwconfig.h"
> > +#include "internal.h"
> > +#include "profiles.h"
> > +#include "thread.h"
> > +#include "videodsp.h"
> > +#include "libavutil/avassert.h"
> > +#include "libavutil/pixdesc.h"
> > +
> > +#include "av1dec.h"
> > +#include "libavformat/av1.h"
> > +
> > +static void setup_past_independence(AV1Frame *f) {
> > +    f->loop_filter_delta_enabled = 1;
> > +
> > +    f->loop_filter_ref_deltas[AV1_REF_FRAME_INTRA] = 1;
> > +    f->loop_filter_ref_deltas[AV1_REF_FRAME_LAST] = 0;
> > +    f->loop_filter_ref_deltas[AV1_REF_FRAME_LAST2] = 0;
> > +    f->loop_filter_ref_deltas[AV1_REF_FRAME_LAST3] = 0;
> > +    f->loop_filter_ref_deltas[AV1_REF_FRAME_GOLDEN] = -1;
> > +    f->loop_filter_ref_deltas[AV1_REF_FRAME_BWDREF] = 0;
> > +    f->loop_filter_ref_deltas[AV1_REF_FRAME_ALTREF2] = -1;
> > +    f->loop_filter_ref_deltas[AV1_REF_FRAME_ALTREF] = -1;
> > +
> > +    f->loop_filter_mode_deltas[0] = 0;
> > +    f->loop_filter_mode_deltas[1] = 0; }
> > +
> > +static void load_previous_and_update(AV1DecContext *s) {
> > +    int i;
> > +
> > +    memcpy(s->cur_frame.loop_filter_ref_deltas,
> > +           s->ref[s-
> >raw_frame_header.primary_ref_frame].loop_filter_ref_deltas,
> > +           AV1_TOTAL_REFS_PER_FRAME * sizeof(int8_t));
> > +    memcpy(s->cur_frame.loop_filter_mode_deltas,
> > +           s->ref[s-
> >raw_frame_header.primary_ref_frame].loop_filter_mode_deltas,
> > +           2 * sizeof(int8_t));
> > +
> > +    if (s->raw_frame_header.loop_filter_delta_update) {
> > +        for (i = 0; i < AV1_TOTAL_REFS_PER_FRAME; i++) {
> > +            if (s->raw_frame_header.update_ref_delta[i])
> > +                s->cur_frame.loop_filter_ref_deltas[i] =
> > +                    s->raw_frame_header.loop_filter_ref_deltas[i];
> > +        }
> > +
> > +        for (i = 0; i < 2; i++) {
> > +            if (s->raw_frame_header.update_mode_delta[i])
> > +                s->cur_frame.loop_filter_mode_deltas[i] =
> > +                    s->raw_frame_header.loop_filter_mode_deltas[i];
> > +        }
> > +    }
> > +
> > +    s->cur_frame.loop_filter_delta_enabled =
> > +        s->raw_frame_header.loop_filter_delta_enabled;
> > +}
> > +
> > +static uint32_t inverse_recenter(int r, uint32_t v) {
> > +    if (v > 2 * r)
> > +        return v;
> > +    else if (v & 1)
> > +        return r - ((v + 1) >> 1);
> > +    else
> > +        return r + (v >> 1);
> > +}
> > +
> > +static uint32_t decode_unsigned_subexp_with_ref(uint32_t sub_exp,
> > +                                                int mx, int r) {
> > +    if ((r << 1) <= mx) {
> > +        return inverse_recenter(r, sub_exp);
> > +    } else {
> > +        return mx - 1 - inverse_recenter(mx - 1 - r, sub_exp);
> > +    }
> > +}
> > +
> > +static int32_t decode_signed_subexp_with_ref(uint32_t sub_exp, int low,
> > +                                             int high, int r) {
> > +    int32_t x = decode_unsigned_subexp_with_ref(sub_exp, high - low, r - low);
> > +    return x + low;
> > +}
> > +
> > +static void read_global_param(AV1DecContext *s, int type, int ref,
> > +int idx) {
> > +    uint32_t abs_bits, prec_bits, round, prec_diff, sub, mx, r;
> > +    abs_bits = AV1_GM_ABS_ALPHA_BITS;
> > +    prec_bits = AV1_GM_ALPHA_PREC_BITS;
> > +
> > +    if (idx < 2) {
> > +        if (type == AV1_WARP_MODEL_TRANSLATION) {
> > +            abs_bits = AV1_GM_ABS_TRANS_ONLY_BITS -
> > +                !s->raw_frame_header.allow_high_precision_mv;
> > +            prec_bits = AV1_GM_TRANS_ONLY_PREC_BITS -
> > +                !s->raw_frame_header.allow_high_precision_mv;
> > +        } else {
> > +            abs_bits = AV1_GM_ABS_TRANS_BITS;
> > +            prec_bits = AV1_GM_TRANS_PREC_BITS;
> > +        }
> > +    }
> > +    round = (idx % 3) == 2 ? (1 << AV1_WARPEDMODEL_PREC_BITS) : 0;
> > +    prec_diff = AV1_WARPEDMODEL_PREC_BITS - prec_bits;
> > +    sub = (idx % 3) == 2 ? (1 << prec_bits) : 0;
> > +    mx = 1 << abs_bits;
> > +    r = (s->ref[s->raw_frame_header.primary_ref_frame].gm_params[ref][idx]
> > +         >> prec_diff) - sub;
> > +
> > +    s->cur_frame.gm_params[ref][idx] =
> > +        (decode_signed_subexp_with_ref(s-
> >raw_frame_header.gm_params[ref][idx],
> > +                                       -mx, mx + 1, r) << prec_diff)
> > ++ round; }
> > +
> > +/**
> > +* update gm type/params, since cbs already implemented part of this
> > +funcation,
> > +* so we don't need to full implement spec.
> 
> FYI, the gm params code in cbs_av1 is currently buggy for some streams, and it
> needs fixing.
Thanks for your information. I will keep it in my mind. For clips(more than 200+) 
that I tested, it looks good and I didn't meet any bug yet from cbs_av1. 

Thanks
Fei
> 
> > +*/
> > +static void global_motion_params(AV1DecContext *s) {
> > +    const AV1RawFrameHeader *header = &s->raw_frame_header;
> > +    int type;
> > +    int i, j;
> > +    for (i = AV1_REF_FRAME_LAST; i <= AV1_REF_FRAME_ALTREF; i++) {
> > +        s->cur_frame.gm_type[i] = AV1_WARP_MODEL_IDENTITY;
> > +        for (j = 0; j <= 5; j++)
> > +            s->cur_frame.gm_params[i][j] = (j % 3 == 2) ?
> > +                                           1 << AV1_WARPEDMODEL_PREC_BITS : 0;
> > +    }
> > +    if (header->frame_type == AV1_FRAME_KEY ||
> > +        header->frame_type == AV1_FRAME_INTRA_ONLY)
> > +        return;
> > +
> > +    for (i = AV1_REF_FRAME_LAST; i <= AV1_REF_FRAME_ALTREF; i++) {
> > +        if (header->is_global[i]) {
> > +            if (header->is_rot_zoom[i]) {
> > +                type = AV1_WARP_MODEL_ROTZOOM;
> > +            } else {
> > +                type = header->is_translation[i] ?
> AV1_WARP_MODEL_TRANSLATION
> > +                                                 : AV1_WARP_MODEL_AFFINE;
> > +            }
> > +        } else {
> > +            type = AV1_WARP_MODEL_IDENTITY;
> > +        }
> > +        s->cur_frame.gm_type[i] = type;
> > +
> > +        if (type >= AV1_WARP_MODEL_ROTZOOM) {
> > +            read_global_param(s, type, i, 2);
> > +            read_global_param(s, type, i, 3);
> > +            if (type == AV1_WARP_MODEL_AFFINE) {
> > +                read_global_param(s, type, i, 4);
> > +                read_global_param(s, type, i, 5);
> > +            } else {
> > +                s->cur_frame.gm_params[i][4] = - s->cur_frame.gm_params[i][3];
> > +                s->cur_frame.gm_params[i][5] = - s->cur_frame.gm_params[i][2];
> > +            }
> > +        }
> > +        if (type >= AV1_WARP_MODEL_TRANSLATION) {
> > +            read_global_param(s, type, i, 0);
> > +            read_global_param(s, type, i, 1);
> > +        }
> > +    }
> > +}
> > +
> > +static int get_tiles_info(AVCodecContext *avctx,
> > +                          AV1RawTileGroup *tile_group,
> > +                          uint32_t tile_group_offset) {
> > +    AV1DecContext *s = avctx->priv_data;
> > +    GetBitContext gb;
> > +    uint16_t tile_num, tile_row, tile_col;
> > +    uint16_t mi_cols, mi_rows, sb_cols, sb_rows, tile_width_sb, tile_height_sb;
> > +    uint32_t size = 0, size_bytes = 0, offset = 0;
> > +    int ret = 0;
> > +
> > +    if ((ret = init_get_bits8(&gb,
> > +                              tile_group->tile_data.data,
> > +                              tile_group->tile_data.data_size)) < 0) {
> > +        av_log(avctx, AV_LOG_ERROR, "Fail to initialize bitstream reader.\n");
> > +        return ret;
> > +    }
> > +
> > +    s->tg_start = tile_group->tg_start;
> > +    s->tg_end = tile_group->tg_end;
> > +
> > +    if (s->raw_frame_header.uniform_tile_spacing_flag) {
> > +        mi_cols = 2 * ((s->raw_seq.max_frame_width_minus_1 + 8) >> 3);
> > +        mi_rows = 2 * ((s->raw_seq.max_frame_height_minus_1 + 8) >> 3);
> > +        sb_cols = s->raw_seq.use_128x128_superblock ? ((mi_cols + 31) >> 5)
> > +                                                    : ((mi_cols + 15) >> 4);
> > +        sb_rows = s->raw_seq.use_128x128_superblock ? ((mi_rows + 31) >> 5)
> > +                                                    : ((mi_rows + 15) >> 4);
> > +        tile_width_sb = (sb_cols + (1 << s->raw_frame_header.tile_cols_log2)
> > +                         -1) >> s->raw_frame_header.tile_cols_log2;
> > +        tile_height_sb = (sb_rows + (1 << s->raw_frame_header.tile_rows_log2)
> > +                         -1) >> s->raw_frame_header.tile_rows_log2;
> > +    }
> > +
> > +    for (tile_num = tile_group->tg_start; tile_num <= tile_group->tg_end;
> tile_num++) {
> > +        tile_row = tile_num / s->raw_frame_header.tile_cols;
> > +        tile_col = tile_num % s->raw_frame_header.tile_cols;
> > +
> > +        if (tile_num == tile_group->tg_end) {
> > +            s->tile_group_info[tile_num].tile_group_offset = tile_group_offset;
> > +            s->tile_group_info[tile_num].tile_size = get_bits_left(&gb) / 8;
> > +            s->tile_group_info[tile_num].tile_offset = offset;
> > +            s->tile_group_info[tile_num].tile_row = tile_row;
> > +            s->tile_group_info[tile_num].tile_column = tile_col;
> > +            s->tile_group_info[tile_num].tile_size_bytes = size_bytes;
> > +            if (s->raw_frame_header.uniform_tile_spacing_flag) {
> > +                s->tile_group_info[tile_num].tile_width_sbs =
> > +                    (tile_col + 1) == s->raw_frame_header.tile_cols ?
> > +                    sb_cols - tile_col * tile_width_sb :
> > +                    tile_width_sb;
> > +                s->tile_group_info[tile_num].tile_height_sbs =
> > +                    (tile_row + 1) == s->raw_frame_header.tile_rows ?
> > +                    sb_rows - tile_row * tile_height_sb :
> > +                    tile_height_sb;
> > +            } else {
> > +                s->tile_group_info[tile_num].tile_width_sbs =
> > +                    s->raw_frame_header.width_in_sbs_minus_1[tile_col] + 1;
> > +                s->tile_group_info[tile_num].tile_height_sbs =
> > +                    s->raw_frame_header.height_in_sbs_minus_1[tile_row] + 1;
> > +            }
> > +            return 0;
> > +        }
> > +        size_bytes = s->raw_frame_header.tile_size_bytes_minus1 + 1;
> > +        size = get_bits_le(&gb, size_bytes * 8) + 1;
> > +        skip_bits(&gb, size * 8);
> > +
> > +        offset += size_bytes;
> > +
> > +        s->tile_group_info[tile_num].tile_group_offset = tile_group_offset;
> > +        s->tile_group_info[tile_num].tile_size = size;
> > +        s->tile_group_info[tile_num].tile_offset = offset;
> > +        s->tile_group_info[tile_num].tile_row = tile_row;
> > +        s->tile_group_info[tile_num].tile_column = tile_col;
> > +        s->tile_group_info[tile_num].tile_size_bytes = size_bytes;
> > +        if (s->raw_frame_header.uniform_tile_spacing_flag) {
> > +            s->tile_group_info[tile_num].tile_width_sbs =
> > +                (tile_col + 1) == s->raw_frame_header.tile_cols ?
> > +                sb_cols - tile_col * tile_width_sb :
> > +                tile_width_sb;
> > +            s->tile_group_info[tile_num].tile_height_sbs =
> > +                (tile_row + 1) == s->raw_frame_header.tile_rows ?
> > +                sb_rows - tile_row * tile_height_sb :
> > +                tile_height_sb;
> > +        } else {
> > +            s->tile_group_info[tile_num].tile_width_sbs =
> > +                s->raw_frame_header.width_in_sbs_minus_1[tile_col] + 1;
> > +            s->tile_group_info[tile_num].tile_height_sbs =
> > +                s->raw_frame_header.height_in_sbs_minus_1[tile_row] + 1;
> > +        }
> > +        offset += size;
> > +    }
> > +
> > +    return 0;
> > +
> > +}
> > +
> > +static int get_pixel_format(AVCodecContext *avctx) {
> > +    AV1DecContext *s = avctx->priv_data;
> > +    const AV1RawSequenceHeader *seq = &s->raw_seq;
> > +    uint8_t bit_depth;
> > +    int ret = 0;
> > +    enum AVPixelFormat pix_fmt = AV_PIX_FMT_NONE; #define
> HWACCEL_MAX
> > +(0)
> > +    enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
> > +
> > +    if (seq->seq_profile == 2 && seq->color_config.high_bitdepth)
> > +        bit_depth = seq->color_config.twelve_bit ? 12 : 10;
> > +    else if (seq->seq_profile <= 2)
> > +        bit_depth = seq->color_config.high_bitdepth ? 10 : 8;
> > +    else {
> > +        av_log(avctx, AV_LOG_ERROR,
> > +               "Unknow av1 profile %d.\n", seq->seq_profile);
> > +        return -1;
> > +    }
> > +
> > +    if (!seq->color_config.mono_chrome) {
> > +        // 4:4:4 x:0 y:0, 4:2:2 x:1 y:0, 4:2:0 x:1 y:1
> > +        if (seq->color_config.subsampling_x == 0 &&
> > +            seq->color_config.subsampling_y == 0) {
> > +            if (bit_depth == 8)
> > +                pix_fmt = AV_PIX_FMT_YUV444P;
> > +            else if (bit_depth == 10)
> > +                pix_fmt = AV_PIX_FMT_YUV444P10;
> > +            else if (bit_depth == 12)
> > +                pix_fmt = AV_PIX_FMT_YUV444P12;
> > +            else
> > +                av_log(avctx, AV_LOG_WARNING, "Unknow av1 pixel format.\n");
> > +        } else if (seq->color_config.subsampling_x == 1 &&
> > +                   seq->color_config.subsampling_y == 0) {
> > +            if (bit_depth == 8)
> > +                pix_fmt = AV_PIX_FMT_YUV422P;
> > +            else if (bit_depth == 10)
> > +                pix_fmt = AV_PIX_FMT_YUV422P10;
> > +            else if (bit_depth == 12)
> > +                pix_fmt = AV_PIX_FMT_YUV422P12;
> > +            else
> > +                av_log(avctx, AV_LOG_WARNING, "Unknow av1 pixel format.\n");
> > +        } else if (seq->color_config.subsampling_x == 1 &&
> > +                   seq->color_config.subsampling_y == 1) {
> > +            if (bit_depth == 8)
> > +                pix_fmt = AV_PIX_FMT_YUV420P;
> > +            else if (bit_depth == 10)
> > +                pix_fmt = AV_PIX_FMT_YUV420P10;
> > +            else if (bit_depth == 12)
> > +                pix_fmt = AV_PIX_FMT_YUV420P12;
> > +            else
> > +                av_log(avctx, AV_LOG_WARNING, "Unknow av1 pixel format.\n");
> > +        }
> > +    } else {
> > +        if (seq->color_config.subsampling_x == 1 &&
> > +            seq->color_config.subsampling_y == 1)
> > +            pix_fmt = AV_PIX_FMT_YUV440P;
> > +        else
> > +            av_log(avctx, AV_LOG_WARNING, "Unknow av1 pixel format.\n");
> > +    }
> > +
> > +    av_log(avctx, AV_LOG_DEBUG, "Av1 decode get format: %s.\n",
> > +           av_get_pix_fmt_name(pix_fmt));
> > +
> > +    if (pix_fmt == AV_PIX_FMT_NONE)
> > +        return -1;
> > +    s->pix_fmt = pix_fmt;
> > +
> > +    *fmtp++ = s->pix_fmt;
> > +    *fmtp = AV_PIX_FMT_NONE;
> > +    ret = ff_thread_get_format(avctx, pix_fmts);
> > +    if (ret < 0)
> > +        return ret;
> > +
> > +    avctx->pix_fmt = ret;
> 
> s->pix_fmt and avctx->pix_fmt may not be in sync after this.
> 
> Do you really need the former at all?

s->pix_fmt can be regard as a reference fmt which av1 decoder want to
decode current stream to. But the HW may not be decode the stream
to the fmt same with reference fmt. So ff_thread_get_format will query
HW and compare with the reference fmt and pick up a best one that HW
Can provided. avctx->pix_fmt will be the actually the HW will decode to. So
they can be different. 

> 
> > +
> > +    return 0;
> > +}
> > +
> > +static void av1_frame_unref(AVCodecContext *avctx, AV1Frame *f) {
> > +    ff_thread_release_buffer(avctx, &f->tf);
> > +    av_buffer_unref(&f->hwaccel_priv_buf);
> > +    f->hwaccel_picture_private = NULL; }
> > +
> > +static int av1_frame_ref(AVCodecContext *avctx, AV1Frame *dst,
> > +AV1Frame *src) {
> > +    int ret = 0;
> > +
> > +    ret = ff_thread_ref_frame(&dst->tf, &src->tf);
> > +    if (ret < 0)
> > +        return ret;
> > +
> > +    if (src->hwaccel_picture_private) {
> > +        dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
> > +        if (!dst->hwaccel_priv_buf)
> > +            goto fail;
> > +        dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
> > +    }
> > +
> > +    dst->loop_filter_delta_enabled = src->loop_filter_delta_enabled;
> > +    memcpy(dst->loop_filter_ref_deltas,
> > +           src->loop_filter_ref_deltas,
> > +           AV1_TOTAL_REFS_PER_FRAME * sizeof(int8_t));
> > +    memcpy(dst->loop_filter_mode_deltas,
> > +           src->loop_filter_mode_deltas,
> > +           2 * sizeof(int8_t));
> > +    memcpy(dst->gm_type,
> > +           src->gm_type,
> > +           AV1_TOTAL_REFS_PER_FRAME * sizeof(uint8_t));
> > +    memcpy(dst->gm_params,
> > +           src->gm_params,
> > +           AV1_TOTAL_REFS_PER_FRAME * 6 * sizeof(int8_t));
> > +
> > +    return 0;
> > +
> > +fail:
> > +    av1_frame_unref(avctx, dst);
> > +    return AVERROR(ENOMEM);
> > +}
> > +
> > +static av_cold int av1_decode_free(AVCodecContext *avctx) {
> > +    AV1DecContext *s = avctx->priv_data;
> > +    int i;
> > +
> > +    for (i = 0; i < FF_ARRAY_ELEMS(s->ref); i++) {
> > +        if (s->ref[i].tf.f->buf[0])
> > +            av1_frame_unref(avctx, &s->ref[i]);
> > +        av_frame_free(&s->ref[i].tf.f);
> > +    }
> > +    if (s->cur_frame.tf.f->buf[0])
> > +        av1_frame_unref(avctx, &s->cur_frame);
> > +    av_frame_free(&s->cur_frame.tf.f);
> > +
> > +    ff_cbs_fragment_free(&s->current_obu);
> > +    ff_cbs_close(&s->cbc);
> > +
> > +    return 0;
> > +}
> > +
> > +static av_cold int av1_decode_init(AVCodecContext *avctx) {
> > +    AV1DecContext *s = avctx->priv_data;
> > +    int i;
> > +    int ret = 0;
> > +
> > +    for (i = 0; i < FF_ARRAY_ELEMS(s->ref); i++) {
> > +        s->ref[i].tf.f = av_frame_alloc();
> > +        if (!s->ref[i].tf.f) {
> > +            av1_decode_free(avctx);
> > +            av_log(avctx, AV_LOG_ERROR,
> > +                   "Fail to allocate frame buffer %d.\n", i);
> > +            return AVERROR(ENOMEM);
> > +        }
> > +    }
> > +
> > +        s->cur_frame.tf.f = av_frame_alloc();
> > +        if (!s->cur_frame.tf.f) {
> > +            av1_decode_free(avctx);
> > +            av_log(avctx, AV_LOG_ERROR, "Fail to allocate frame buffer.\n");
> > +            return AVERROR(ENOMEM);
> > +        }
> 
> Wrong indentation.


Thanks, I will fix this in my next patch version.

> 
> > +
> > +    s->avctx = avctx;
> > +    s->pix_fmt = AV_PIX_FMT_NONE;
> > +
> > +    ret = ff_cbs_init(&s->cbc, AV_CODEC_ID_AV1, avctx);
> > +    if (ret < 0)
> > +        return ret;
> > +    return 0;
> > +}
> > +
> > +static int av1_frame_alloc(AVCodecContext *avctx, AV1Frame *f) {
> > +    int ret;
> > +    if ((ret = ff_thread_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF))
> < 0)
> > +        return ret;
> > +
> > +    if (avctx->hwaccel) {
> > +        const AVHWAccel *hwaccel = avctx->hwaccel;
> > +        if (hwaccel->frame_priv_data_size) {
> > +            f->hwaccel_priv_buf =
> > +                av_buffer_allocz(hwaccel->frame_priv_data_size);
> > +            if (!f->hwaccel_priv_buf)
> > +                goto fail;
> > +            f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
> > +        }
> > +    }
> > +    return 0;
> > +
> > +fail:
> > +    av1_frame_unref(avctx, f);
> > +    return AVERROR(ENOMEM);
> > +}
> > +
> > +static int set_output_frame(AVFrame *srcframe, AVFrame *frame,
> > +AVPacket *pkt) {
> > +    int ret = 0;
> > +    if ((ret = av_frame_ref(frame, srcframe)) < 0) {
> > +        return ret;
> > +    }
> > +
> > +    frame->pts = pkt->pts;
> > +    frame->pkt_dts = pkt->dts;
> 
> Did you add this because you saw bogus timestamps filled by
> ff_thread_get_buffer? If so, could you try applying
> https://patchwork.ffmpeg.org/project/ffmpeg/list/?series=2057 and removing
> this chunk, to see if it fixes it?
Will add FF_CODEC_CAP_SETS_PKT_DTS to caps_internal as you mentioned in
another mail thread. Thanks.

> 
> > +
> > +    return 0;
> > +}
> > +
> > +static int update_reference_list (AVCodecContext *avctx) {
> > +    AV1DecContext *s = avctx->priv_data;
> > +    AV1RawFrameHeader *header= &s->raw_frame_header;
> > +    int i;
> > +    int ret = 0;
> > +
> > +    for (i = 0; i < 8; i++) {
> > +        if (header->frame_type == AV1_FRAME_KEY ||
> > +            (header->refresh_frame_flags & (1 << i))) {
> > +            if (s->ref[i].tf.f->buf[0])
> > +                av1_frame_unref(avctx, &s->ref[i]);
> > +            if ((ret = av1_frame_ref(avctx, &s->ref[i], &s->cur_frame)) < 0) {
> > +                av_log(avctx, AV_LOG_DEBUG, "Ref frame error:%d.\n", ret);
> > +                return ret;
> > +            }
> > +        }
> > +    }
> > +    return 0;
> > +}
> > +
> > +static int get_current_frame(AVCodecContext *avctx) {
> > +    AV1DecContext *s = avctx->priv_data;
> > +    int ret = 0;
> > +
> > +    if (s->cur_frame.tf.f->buf[0])
> > +        av1_frame_unref(avctx, &s->cur_frame);
> > +
> > +    ret = av1_frame_alloc(avctx, &s->cur_frame);
> > +    if (ret < 0) {
> > +        av_log(avctx, AV_LOG_ERROR, "Get current frame fail.\n");
> > +        return ret;
> > +    }
> > +
> > +    if (s->raw_frame_header.primary_ref_frame == AV1_PRIMARY_REF_NONE)
> > +        setup_past_independence(&s->cur_frame);
> > +    else
> > +        load_previous_and_update(s);
> > +
> > +    global_motion_params(s);
> > +
> > +    return ret;
> > +}
> > +
> > +static int decode_current_frame(AVCodecContext *avctx) {
> > +    int ret = 0;
> > +
> > +    if (avctx->hwaccel) {
> > +        ret = avctx->hwaccel->start_frame(avctx, NULL, 0);
> 
> You should pass this a buffer. Some hwaccel implementation may need it.

Thanks, I will modify this in my next patch version.

> 
> > +        if (ret < 0) {
> > +            av_log(avctx, AV_LOG_ERROR, "HW accel strat frame fail.\n");
> > +            return ret;
> > +        }
> > +
> > +        ret = avctx->hwaccel->end_frame(avctx);
> 
> Shouldn't hwaccel->decode_slice() be called before this, and not after?

Actually decode_slice() will be called first, then start_frame() and end_frame.
For vaapi driver, it doesn't care the order of sequence info(filled in start_frame)
and slice info(filled in decode_slice). Consider other HW codecs, I will modify
it to general order like start_frame, decode_slice, then end_frame. Thanks.

> 
> > +        if (ret < 0) {
> > +            av_log(avctx, AV_LOG_ERROR, "HW accel end frame fail.\n");
> > +            return ret;
> > +        }
> > +    } else {
> > +        av_log(avctx, AV_LOG_ERROR, "Your platform doesn't suppport
> hardware "
> > +               "acceleration AV1 decode.\n");
> > +        return -1;
> > +    }
> > +
> > +    ret = update_reference_list(avctx);
> > +    if (ret < 0) {
> > +        av_log(avctx, AV_LOG_ERROR, "Fail to update reference list.\n");
> > +        return ret;
> > +    }
> > +    return 0;
> > +}
> > +
> > +static int av1_decode_frame(AVCodecContext *avctx, void *frame,
> > +                            int *got_frame, AVPacket *pkt) {
> > +    AV1DecContext *s = avctx->priv_data;
> > +    AV1RawSequenceHeader* raw_seq = NULL;
> > +    AV1RawFrameHeader *raw_frame_header = NULL;
> > +    AV1RawTileGroup *raw_tile_group = NULL;
> > +    uint32_t tile_group_offset = 0;
> 
> Should be ptrdiff_t.

Thanks, I will fix this in my next patch version.

> 
> > +    int i;
> 
> Just do for (int i...) below.

OK, I will modify this. Thanks.

> 
> > +    int ret = 0;
> > +
> > +    ret = ff_cbs_read_packet(s->cbc, &s->current_obu, pkt);
> > +    if (ret < 0) {
> > +        av_log(avctx, AV_LOG_ERROR, "Fail to read packet.\n");
> > +        goto end;
> > +    }
> > +    av_log(avctx, AV_LOG_DEBUG, "Total obu for this frame:%d.\n",
> > +           s->current_obu.nb_units);
> > +
> > +    for (i = 0; i < s->current_obu.nb_units; i++) {
> > +        CodedBitstreamUnit *unit = &s->current_obu.units[i];
> > +        AV1RawOBU *obu = unit->content;
> > +        av_log(avctx, AV_LOG_DEBUG, "Obu idx:%d, obu type:%d.\n", i,
> > + unit->type);
> > +
> > +        switch (unit->type) {
> > +        case AV1_OBU_SEQUENCE_HEADER:
> > +            raw_seq = &obu->obu.sequence_header;
> > +            memcpy(&s->raw_seq, raw_seq,
> > + sizeof(AV1RawSequenceHeader));
> 
> You can use
> (CodedBitstreamAV1Context*)(s->cbc->priv_data)->sequence_header
> for this instead. And if you'd rather not, you can always create a new reference
> to unit->content_ref and store it in AV1DecContext.

Create a new reference will be better and store them in AV1DecContext. VAAPI
need both sequence header and frame header in hwaccel->start_frame. 

> 
> Also, consider adding a avctx->hwaccel->decode_params() call here as well, in
> case some future hwaccel decides/requires to implement it.

OK, will add it in my next patch version. Thanks.

> 
> > +            if (s->pix_fmt == AV_PIX_FMT_NONE) {
> > +                ret = get_pixel_format(avctx);
> > +                if (ret < 0) {
> > +                    av_log(avctx, AV_LOG_ERROR,
> > +                           "Fail to get format from sequence header.\n");
> > +                    goto end;
> > +                }
> > +            }
> > +            break;
> > +        case AV1_OBU_FRAME_HEADER:
> > +        case AV1_OBU_REDUNDANT_FRAME_HEADER:
> > +            raw_frame_header = &obu->obu.frame_header;
> > +            memcpy (&s->raw_frame_header,
> > +                    raw_frame_header,
> > +                    sizeof(AV1RawFrameHeader));
> 
> Same as above, you can create a new reference to unit->content_ref instead.
> memcpy should be avoided as much as possible, especially in this case which is
> run on every frame.

Will modify this, Thanks.

> 
> > +            s->tile_num =
> > +                raw_frame_header->tile_cols * raw_frame_header->tile_rows;
> > +            if (raw_frame_header->show_existing_frame) {
> > +                if (set_output_frame(s->ref[raw_frame_header-
> >frame_to_show_map_idx].tf.f,
> > +                                     frame, pkt) < 0) {
> > +                    av_log(avctx, AV_LOG_DEBUG, "Set output frame error:%d.\n",
> > +                           ret);
> > +                    goto end;
> > +                }
> > +                *got_frame = 1;
> > +                goto end;
> > +            } else {
> > +                ret = get_current_frame(avctx);
> > +                if (ret < 0) {
> > +                    av_log(avctx, AV_LOG_DEBUG, "Get current frame error:%d.\n",
> > +                           ret);
> > +                    goto end;
> > +                }
> > +            }
> > +            break;
> > +        case AV1_OBU_FRAME:
> > +            raw_frame_header = &obu->obu.frame.header;
> > +            raw_tile_group = &obu->obu.frame.tile_group;
> > +            memcpy(&s->raw_frame_header,
> > +                   raw_frame_header,
> > +                   sizeof(AV1RawFrameHeader));
> 
> Ditto.

Will modify this, Thanks.

> 
> > +            s->tile_num =
> > +                raw_frame_header->tile_cols * raw_frame_header->tile_rows;
> > +            tile_group_offset = raw_tile_group->tile_data.data - pkt->data;
> > +            get_tiles_info(avctx, raw_tile_group, tile_group_offset);
> > +
> > +            ret = get_current_frame(avctx);
> > +            if (ret < 0) {
> > +                av_log(avctx, AV_LOG_DEBUG, "Get current frame error:%d.\n",
> > +                       ret);
> > +                goto end;
> > +            }
> > +            if (avctx->hwaccel) {
> > +                ret = avctx->hwaccel->decode_slice(avctx, pkt->data,
> > + pkt->size);
> 
> Can't you pass raw_tile_group->tile_data.data or something less broad than the
> entire packet?

Will try to modify this in my next patch version.

> 
> > +                if (ret < 0) {
> > +                    av_log(avctx, AV_LOG_ERROR,
> > +                           "HW accel decode slice fail.\n");
> > +                    return ret;
> > +                }
> > +            }
> > +            ret = decode_current_frame(avctx);
> > +            if (ret < 0) {
> > +                av_log(avctx, AV_LOG_DEBUG, "Decode current frame error:%d.\n",
> > +                       ret);
> > +                goto end;
> > +            }
> > +            if (raw_frame_header->show_frame) {
> > +                if (set_output_frame(s->cur_frame.tf.f, frame, pkt) < 0) {
> > +                    av_log(avctx, AV_LOG_DEBUG, "Set output frame error:%d.\n",
> > +                           ret);
> > +                    goto end;
> > +                }
> > +
> > +                *got_frame = 1;
> > +            }
> > +            break;
> > +        case AV1_OBU_TILE_GROUP:
> 
> You can merge this and the frame/frame_header cases above, letting it fall
> through at the point it would start reading the AV1_OBU_TILE_GROUP bits in
> the former case.

Will try to modify this. Thanks.

> 
> > +            raw_tile_group = &obu->obu.tile_group;
> > +            tile_group_offset = raw_tile_group->tile_data.data - pkt->data;
> > +            get_tiles_info(avctx, raw_tile_group, tile_group_offset);
> > +
> > +            if (avctx->hwaccel) {
> > +                ret = avctx->hwaccel->decode_slice(avctx, pkt->data, pkt->size);
> > +                if (ret < 0) {
> > +                    av_log(avctx, AV_LOG_ERROR,
> > +                           "HW accel decode slice fail.\n");
> > +                    return ret;
> > +                }
> > +            }
> > +            if (s->tile_num != raw_tile_group->tg_end + 1)
> > +                break;
> > +            ret = decode_current_frame(avctx);
> > +            if (ret < 0) {
> > +                av_log(avctx, AV_LOG_DEBUG, "Decode current frame error:%d.\n",
> > +                       ret);
> > +                goto end;
> > +            }
> > +
> > +            if (raw_frame_header->show_frame) {
> > +                if (set_output_frame(s->cur_frame.tf.f, frame, pkt) < 0) {
> > +                    av_log(avctx, AV_LOG_DEBUG, "Set output frame error:%d.\n",
> > +                           ret);
> > +                    goto end;
> > +                }
> > +
> > +                *got_frame = 1;
> > +            }
> > +            break;
> > +        case AV1_OBU_TILE_LIST:
> > +        case AV1_OBU_TEMPORAL_DELIMITER:
> > +        case AV1_OBU_PADDING:
> > +        case AV1_OBU_METADATA:
> > +            break;
> > +        default:
> > +            av_log(avctx, AV_LOG_DEBUG,
> > +                   "Un-implemented obu type: %d (%zd bits).\n",
> 
> Unknown, rather than unimplemented.

Will try to modify this in my next patch version. Thanks.

> 
> > +                   unit->type, unit->data_size);
> > +        }
> > +    }
> > +
> > +end:
> > +    ff_cbs_fragment_reset(&s->current_obu);
> > +    return ret;
> > +}
> > +
> > +static void av1_decode_flush(AVCodecContext *avctx) {
> > +    AV1DecContext *s = avctx->priv_data;
> > +    int i;
> > +
> > +    for (i = 0; i < FF_ARRAY_ELEMS(s->ref); i++)
> > +        av1_frame_unref(avctx, &s->ref[i]);
> > +
> > +    av1_frame_unref(avctx, &s->cur_frame); }
> > +
> > +AVCodec ff_av1_decoder = {
> > +    .name                  = "av1",
> > +    .long_name             = NULL_IF_CONFIG_SMALL("Hardware Acceleration
> AV1"),
> 
> "Alliance for Open Media AV1" or something like that. The fact the decoder is
> currently hwaccel backed only is irrelevant to its name.

Actually I want to make it obviously to let user know this av1 decoder is just for
hwaccel by using cmd like "ffmpeg --help decoder=av1". If you think we can ignore
this, I will change it. Thanks.

> 
> > +    .type                  = AVMEDIA_TYPE_VIDEO,
> > +    .id                    = AV_CODEC_ID_AV1,
> > +    .priv_data_size        = sizeof(AV1DecContext),
> > +    .init                  = av1_decode_init,
> > +    .close                 = av1_decode_free,
> > +    .decode                = av1_decode_frame,
> > +    .capabilities          = AV_CODEC_CAP_DR1,
> 
> Probably should add caps_internal like FF_CODEC_CAP_INIT_THREADSAFE and
> FF_CODEC_CAP_INIT_CLEANUP.

Will add it in my next patch version. Thanks.

> 
> > +    .flush                 = av1_decode_flush,
> > +    .profiles              = NULL_IF_CONFIG_SMALL(ff_av1_profiles),
> > +    .hw_configs            = (const AVCodecHWConfigInternal * []) {
> > +        NULL
> > +    },
> > +};
> > diff --git a/libavcodec/av1dec.h b/libavcodec/av1dec.h new file mode
> > 100644 index 0000000000..5cb0e896b7
> > --- /dev/null
> > +++ b/libavcodec/av1dec.h
> > @@ -0,0 +1,89 @@
> > +/*
> > + * AV1 video decoder
> > + * *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> > +02110-1301 USA  */
> > +
> > +#ifndef AVCODEC_AV1DEC_H
> > +#define AVCODEC_AV1DEC_H
> > +
> > +#include <stdatomic.h>
> > +#include "libavutil/buffer.h"
> > +#include "libavutil/md5.h"
> > +#include "avcodec.h"
> > +#include "bswapdsp.h"
> > +#include "get_bits.h"
> > +#include "av1_parse.h"
> > +#include "libavformat/avformat.h"
> > +#include "libavformat/av1.h"
> > +#include "thread.h"
> > +#include "cbs.h"
> > +#include "cbs_av1.h"
> > +
> > +typedef enum AV1FrameRestorationType {
> > +    AV1_RESTORE_NONE,
> > +    AV1_RESTORE_WIENER,
> > +    AV1_RESTORE_SGRPROJ,
> > +    AV1_RESTORE_SWITCHABLE,
> > +} AV1FrameRestorationType;
> 
> Should be added to av1.h instead.
> 
> > +
> > +typedef struct AV1Frame {
> > +    ThreadFrame tf;
> > +
> > +    AVBufferRef *hwaccel_priv_buf;
> > +    void *hwaccel_picture_private;
> > +
> > +    uint8_t loop_filter_delta_enabled;
> > +    int8_t  loop_filter_ref_deltas[AV1_TOTAL_REFS_PER_FRAME];
> > +    int8_t  loop_filter_mode_deltas[2];
> > +    uint8_t gm_type[AV1_TOTAL_REFS_PER_FRAME];
> > +    int32_t gm_params[AV1_TOTAL_REFS_PER_FRAME][6];
> > +} AV1Frame;
> > +
> > +typedef struct TileGroupInfo {
> > +    uint32_t tile_group_offset;
> > +    uint32_t tile_offset;
> > +    uint32_t tile_size;
> > +    uint16_t tile_row;
> > +    uint16_t tile_column;
> > +    uint16_t tile_width_sbs;
> > +    uint16_t tile_height_sbs;
> > +    uint32_t tile_size_bytes;
> > +} TileGroupInfo;
> > +
> > +typedef struct AV1DecContext {
> > +    const AVClass *class;
> > +    AVCodecContext *avctx;
> > +
> > +    enum AVPixelFormat pix_fmt;
> > +    CodedBitstreamContext *cbc;
> > +    CodedBitstreamFragment current_obu;
> > +
> > +    AV1RawSequenceHeader raw_seq;
> > +    AV1RawFrameHeader raw_frame_header;
> > +    AV1RawTileGroup raw_tile_group;
> > +    TileGroupInfo tile_group_info[128];
> > +    uint16_t tile_num;
> > +    uint16_t tg_start;
> > +    uint16_t tg_end;
> > +
> > +    AV1Frame ref[8];
> > +    AV1Frame cur_frame;
> > +
> > +} AV1DecContext;
> > +
> > +#endif /* AVCODEC_AV1DEC_H */
> > diff --git a/libavcodec/version.h b/libavcodec/version.h index
> > a3f9f828ee..5bdfdce363 100644
> > --- a/libavcodec/version.h
> > +++ b/libavcodec/version.h
> > @@ -28,7 +28,7 @@
> >  #include "libavutil/version.h"
> >
> >  #define LIBAVCODEC_VERSION_MAJOR  58
> > -#define LIBAVCODEC_VERSION_MINOR 100
> > +#define LIBAVCODEC_VERSION_MINOR 101
> >  #define LIBAVCODEC_VERSION_MICRO 100
> >
> >  #define LIBAVCODEC_VERSION_INT
> > AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
> >
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org
> with subject "unsubscribe".
Fei Wang Aug. 24, 2020, 8:39 a.m. UTC | #12
> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Mark
> Thompson
> Sent: Sunday, August 23, 2020 6:01 AM
> To: ffmpeg-devel@ffmpeg.org
> Subject: Re: [FFmpeg-devel] [PATCH 1/2] avcodec: add av1 hardware
> acceleration decoder
> 
> On 21/08/2020 06:29, Fei Wang wrote:
> > This av1 decoder is now only used for av1 hardware acceleration
> > decoder. Consider it can be extend to a local decoder like hevc or vp9
> > in the future, so define its name as "av1" and put it into external
> > libraries codec list.
> >
> > Signed-off-by: Fei Wang <fei.w.wang@intel.com>
> > ---
> >   Changelog              |   1 +
> >   configure              |   1 +
> >   libavcodec/Makefile    |   1 +
> >   libavcodec/allcodecs.c |   1 +
> >   libavcodec/av1dec.c    | 746
> +++++++++++++++++++++++++++++++++++++++++
> >   libavcodec/av1dec.h    |  89 +++++
> >   libavcodec/version.h   |   2 +-
> >   7 files changed, 840 insertions(+), 1 deletion(-)
> >   create mode 100644 libavcodec/av1dec.c
> >   create mode 100644 libavcodec/av1dec.h
> >
> > diff --git a/Changelog b/Changelog
> > index 1efc768387..3ff88cc12f 100644
> > --- a/Changelog
> > +++ b/Changelog
> > @@ -14,6 +14,7 @@ version <next>:
> >   - ADPCM Argonaut Games encoder
> >   - Argonaut Games ASF muxer
> >   - AV1 Low overhead bitstream format demuxer
> > +- AV1 decoder (Hardware acceleration used only)
> >
> >
> >   version 4.3:
> > diff --git a/configure b/configure
> > index 6faff9bc7b..ef71e47c4e 100755
> > --- a/configure
> > +++ b/configure
> > @@ -2685,6 +2685,7 @@ atrac3al_decoder_select="mdct"
> >   atrac3p_decoder_select="mdct sinewin"
> >   atrac3pal_decoder_select="mdct sinewin"
> >   atrac9_decoder_select="mdct"
> > +av1_decoder_select="cbs_av1"
> >   avrn_decoder_select="exif jpegtables"
> >   bink_decoder_select="blockdsp hpeldsp"
> >   binkaudio_dct_decoder_select="mdct rdft dct sinewin wma_freqs"
> > diff --git a/libavcodec/Makefile b/libavcodec/Makefile index
> > 3431ba2dca..2371039bd2 100644
> > --- a/libavcodec/Makefile
> > +++ b/libavcodec/Makefile
> > @@ -764,6 +764,7 @@ OBJS-$(CONFIG_ZLIB_DECODER)            += lcldec.o
> >   OBJS-$(CONFIG_ZLIB_ENCODER)            += lclenc.o
> >   OBJS-$(CONFIG_ZMBV_DECODER)            += zmbv.o
> >   OBJS-$(CONFIG_ZMBV_ENCODER)            += zmbvenc.o
> > +OBJS-$(CONFIG_AV1_DECODER)             += av1dec.o
> >
> >   # (AD)PCM decoders/encoders
> >   OBJS-$(CONFIG_PCM_ALAW_DECODER)           += pcm.o
> > diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c index
> > 4bd830e5d0..00799d3431 100644
> > --- a/libavcodec/allcodecs.c
> > +++ b/libavcodec/allcodecs.c
> > @@ -761,6 +761,7 @@ extern AVCodec ff_idf_decoder;
> >    * above is available */
> >   extern AVCodec ff_aac_mf_encoder;
> >   extern AVCodec ff_ac3_mf_encoder;
> > +extern AVCodec ff_av1_decoder;
> >   extern AVCodec ff_h263_v4l2m2m_encoder;
> >   extern AVCodec ff_libaom_av1_decoder;
> >   extern AVCodec ff_libopenh264_encoder; diff --git
> > a/libavcodec/av1dec.c b/libavcodec/av1dec.c new file mode 100644 index
> > 0000000000..ab88ed987a
> > --- /dev/null
> > +++ b/libavcodec/av1dec.c
> > @@ -0,0 +1,746 @@
> > +/*
> > + * AV1video decoder
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> > +02110-1301 USA  */
> > +
> > +#include "avcodec.h"
> > +#include "get_bits.h"
> > +#include "hwconfig.h"
> > +#include "internal.h"
> > +#include "profiles.h"
> > +#include "thread.h"
> > +#include "videodsp.h"
> > +#include "libavutil/avassert.h"
> > +#include "libavutil/pixdesc.h"
> > +
> > +#include "av1dec.h"
> > +#include "libavformat/av1.h"
> > +
> > +static void setup_past_independence(AV1Frame *f) {
> > +    f->loop_filter_delta_enabled = 1;
> > +
> > +    f->loop_filter_ref_deltas[AV1_REF_FRAME_INTRA] = 1;
> > +    f->loop_filter_ref_deltas[AV1_REF_FRAME_LAST] = 0;
> > +    f->loop_filter_ref_deltas[AV1_REF_FRAME_LAST2] = 0;
> > +    f->loop_filter_ref_deltas[AV1_REF_FRAME_LAST3] = 0;
> > +    f->loop_filter_ref_deltas[AV1_REF_FRAME_GOLDEN] = -1;
> > +    f->loop_filter_ref_deltas[AV1_REF_FRAME_BWDREF] = 0;
> > +    f->loop_filter_ref_deltas[AV1_REF_FRAME_ALTREF2] = -1;
> > +    f->loop_filter_ref_deltas[AV1_REF_FRAME_ALTREF] = -1;
> > +
> > +    f->loop_filter_mode_deltas[0] = 0;
> > +    f->loop_filter_mode_deltas[1] = 0; }
> > +
> > +static void load_previous_and_update(AV1DecContext *s) {
> > +    int i;
> > +
> > +    memcpy(s->cur_frame.loop_filter_ref_deltas,
> > +           s->ref[s-
> >raw_frame_header.primary_ref_frame].loop_filter_ref_deltas,
> > +           AV1_TOTAL_REFS_PER_FRAME * sizeof(int8_t));
> > +    memcpy(s->cur_frame.loop_filter_mode_deltas,
> > +           s->ref[s-
> >raw_frame_header.primary_ref_frame].loop_filter_mode_deltas,
> > +           2 * sizeof(int8_t));
> > +
> > +    if (s->raw_frame_header.loop_filter_delta_update) {
> > +        for (i = 0; i < AV1_TOTAL_REFS_PER_FRAME; i++) {
> > +            if (s->raw_frame_header.update_ref_delta[i])
> > +                s->cur_frame.loop_filter_ref_deltas[i] =
> > +                    s->raw_frame_header.loop_filter_ref_deltas[i];
> > +        }
> > +
> > +        for (i = 0; i < 2; i++) {
> > +            if (s->raw_frame_header.update_mode_delta[i])
> > +                s->cur_frame.loop_filter_mode_deltas[i] =
> > +                    s->raw_frame_header.loop_filter_mode_deltas[i];
> > +        }
> > +    }
> > +
> > +    s->cur_frame.loop_filter_delta_enabled =
> > +        s->raw_frame_header.loop_filter_delta_enabled;
> > +}
> > +
> > +static uint32_t inverse_recenter(int r, uint32_t v) {
> > +    if (v > 2 * r)
> > +        return v;
> > +    else if (v & 1)
> > +        return r - ((v + 1) >> 1);
> > +    else
> > +        return r + (v >> 1);
> > +}
> > +
> > +static uint32_t decode_unsigned_subexp_with_ref(uint32_t sub_exp,
> > +                                                int mx, int r) {
> > +    if ((r << 1) <= mx) {
> > +        return inverse_recenter(r, sub_exp);
> > +    } else {
> > +        return mx - 1 - inverse_recenter(mx - 1 - r, sub_exp);
> > +    }
> > +}
> > +
> > +static int32_t decode_signed_subexp_with_ref(uint32_t sub_exp, int low,
> > +                                             int high, int r) {
> > +    int32_t x = decode_unsigned_subexp_with_ref(sub_exp, high - low, r - low);
> > +    return x + low;
> > +}
> > +
> > +static void read_global_param(AV1DecContext *s, int type, int ref,
> > +int idx) {
> > +    uint32_t abs_bits, prec_bits, round, prec_diff, sub, mx, r;
> > +    abs_bits = AV1_GM_ABS_ALPHA_BITS;
> > +    prec_bits = AV1_GM_ALPHA_PREC_BITS;
> > +
> > +    if (idx < 2) {
> > +        if (type == AV1_WARP_MODEL_TRANSLATION) {
> > +            abs_bits = AV1_GM_ABS_TRANS_ONLY_BITS -
> > +                !s->raw_frame_header.allow_high_precision_mv;
> > +            prec_bits = AV1_GM_TRANS_ONLY_PREC_BITS -
> > +                !s->raw_frame_header.allow_high_precision_mv;
> > +        } else {
> > +            abs_bits = AV1_GM_ABS_TRANS_BITS;
> > +            prec_bits = AV1_GM_TRANS_PREC_BITS;
> > +        }
> > +    }
> > +    round = (idx % 3) == 2 ? (1 << AV1_WARPEDMODEL_PREC_BITS) : 0;
> > +    prec_diff = AV1_WARPEDMODEL_PREC_BITS - prec_bits;
> > +    sub = (idx % 3) == 2 ? (1 << prec_bits) : 0;
> > +    mx = 1 << abs_bits;
> > +    r = (s->ref[s->raw_frame_header.primary_ref_frame].gm_params[ref][idx]
> > +         >> prec_diff) - sub;
> > +
> > +    s->cur_frame.gm_params[ref][idx] =
> > +        (decode_signed_subexp_with_ref(s-
> >raw_frame_header.gm_params[ref][idx],
> > +                                       -mx, mx + 1, r) << prec_diff)
> > ++ round; }
> > +
> > +/**
> > +* update gm type/params, since cbs already implemented part of this
> > +funcation,
> > +* so we don't need to full implement spec.
> > +*/
> > +static void global_motion_params(AV1DecContext *s) {
> > +    const AV1RawFrameHeader *header = &s->raw_frame_header;
> > +    int type;
> > +    int i, j;
> > +    for (i = AV1_REF_FRAME_LAST; i <= AV1_REF_FRAME_ALTREF; i++) {
> > +        s->cur_frame.gm_type[i] = AV1_WARP_MODEL_IDENTITY;
> > +        for (j = 0; j <= 5; j++)
> > +            s->cur_frame.gm_params[i][j] = (j % 3 == 2) ?
> > +                                           1 << AV1_WARPEDMODEL_PREC_BITS : 0;
> > +    }
> > +    if (header->frame_type == AV1_FRAME_KEY ||
> > +        header->frame_type == AV1_FRAME_INTRA_ONLY)
> > +        return;
> > +
> > +    for (i = AV1_REF_FRAME_LAST; i <= AV1_REF_FRAME_ALTREF; i++) {
> > +        if (header->is_global[i]) {
> > +            if (header->is_rot_zoom[i]) {
> > +                type = AV1_WARP_MODEL_ROTZOOM;
> > +            } else {
> > +                type = header->is_translation[i] ?
> AV1_WARP_MODEL_TRANSLATION
> > +                                                 : AV1_WARP_MODEL_AFFINE;
> > +            }
> > +        } else {
> > +            type = AV1_WARP_MODEL_IDENTITY;
> > +        }
> > +        s->cur_frame.gm_type[i] = type;
> > +
> > +        if (type >= AV1_WARP_MODEL_ROTZOOM) {
> > +            read_global_param(s, type, i, 2);
> > +            read_global_param(s, type, i, 3);
> > +            if (type == AV1_WARP_MODEL_AFFINE) {
> > +                read_global_param(s, type, i, 4);
> > +                read_global_param(s, type, i, 5);
> > +            } else {
> > +                s->cur_frame.gm_params[i][4] = - s->cur_frame.gm_params[i][3];
> > +                s->cur_frame.gm_params[i][5] = - s->cur_frame.gm_params[i][2];
> > +            }
> > +        }
> > +        if (type >= AV1_WARP_MODEL_TRANSLATION) {
> > +            read_global_param(s, type, i, 0);
> > +            read_global_param(s, type, i, 1);
> > +        }
> > +    }
> > +}
> > +
> > +static int get_tiles_info(AVCodecContext *avctx,
> > +                          AV1RawTileGroup *tile_group,
> > +                          uint32_t tile_group_offset) {
> > +    AV1DecContext *s = avctx->priv_data;
> > +    GetBitContext gb;
> > +    uint16_t tile_num, tile_row, tile_col;
> > +    uint16_t mi_cols, mi_rows, sb_cols, sb_rows, tile_width_sb, tile_height_sb;
> > +    uint32_t size = 0, size_bytes = 0, offset = 0;
> > +    int ret = 0;
> > +
> > +    if ((ret = init_get_bits8(&gb,
> > +                              tile_group->tile_data.data,
> > +                              tile_group->tile_data.data_size)) < 0) {
> > +        av_log(avctx, AV_LOG_ERROR, "Fail to initialize bitstream reader.\n");
> > +        return ret;
> > +    }
> > +
> > +    s->tg_start = tile_group->tg_start;
> > +    s->tg_end = tile_group->tg_end;
> > +
> > +    if (s->raw_frame_header.uniform_tile_spacing_flag) {
> > +        mi_cols = 2 * ((s->raw_seq.max_frame_width_minus_1 + 8) >> 3);
> > +        mi_rows = 2 * ((s->raw_seq.max_frame_height_minus_1 + 8) >> 3);
> > +        sb_cols = s->raw_seq.use_128x128_superblock ? ((mi_cols + 31) >> 5)
> > +                                                    : ((mi_cols + 15) >> 4);
> > +        sb_rows = s->raw_seq.use_128x128_superblock ? ((mi_rows + 31) >> 5)
> > +                                                    : ((mi_rows + 15) >> 4);
> > +        tile_width_sb = (sb_cols + (1 << s->raw_frame_header.tile_cols_log2)
> > +                         -1) >> s->raw_frame_header.tile_cols_log2;
> > +        tile_height_sb = (sb_rows + (1 << s->raw_frame_header.tile_rows_log2)
> > +                         -1) >> s->raw_frame_header.tile_rows_log2;
> 
> Maybe cbs read should always fill the width/height_in_sbs arrays so that all the
> special casing for uniform_spacing_flag in this function isn't needed?  It would
> be trivial to add it there.
@James Almer, how do you think of this? Currently, cbs_av1 read 
width/height_in_sbs_minus_1 directly from bitstream. But if uniform_spacing_flag is 1,
the 2 values may need some calculation like here does.

> 
> > +    }
> > +
> > +    for (tile_num = tile_group->tg_start; tile_num <= tile_group->tg_end;
> tile_num++) {
> > +        tile_row = tile_num / s->raw_frame_header.tile_cols;
> > +        tile_col = tile_num % s->raw_frame_header.tile_cols;
> > +
> > +        if (tile_num == tile_group->tg_end) {
> > +            s->tile_group_info[tile_num].tile_group_offset = tile_group_offset;
> > +            s->tile_group_info[tile_num].tile_size = get_bits_left(&gb) / 8;
> > +            s->tile_group_info[tile_num].tile_offset = offset;
> > +            s->tile_group_info[tile_num].tile_row = tile_row;
> > +            s->tile_group_info[tile_num].tile_column = tile_col;
> > +            s->tile_group_info[tile_num].tile_size_bytes = size_bytes;
> > +            if (s->raw_frame_header.uniform_tile_spacing_flag) {
> > +                s->tile_group_info[tile_num].tile_width_sbs =
> > +                    (tile_col + 1) == s->raw_frame_header.tile_cols ?
> > +                    sb_cols - tile_col * tile_width_sb :
> > +                    tile_width_sb;
> > +                s->tile_group_info[tile_num].tile_height_sbs =
> > +                    (tile_row + 1) == s->raw_frame_header.tile_rows ?
> > +                    sb_rows - tile_row * tile_height_sb :
> > +                    tile_height_sb;
> > +            } else {
> > +                s->tile_group_info[tile_num].tile_width_sbs =
> > +                    s->raw_frame_header.width_in_sbs_minus_1[tile_col] + 1;
> > +                s->tile_group_info[tile_num].tile_height_sbs =
> > +                    s->raw_frame_header.height_in_sbs_minus_1[tile_row] + 1;
> > +            }
> > +            return 0;
> > +        }
> > +        size_bytes = s->raw_frame_header.tile_size_bytes_minus1 + 1;
> > +        size = get_bits_le(&gb, size_bytes * 8) + 1;
> > +        skip_bits(&gb, size * 8);
> > +
> > +        offset += size_bytes;
> > +
> > +        s->tile_group_info[tile_num].tile_group_offset = tile_group_offset;
> > +        s->tile_group_info[tile_num].tile_size = size;
> > +        s->tile_group_info[tile_num].tile_offset = offset;
> > +        s->tile_group_info[tile_num].tile_row = tile_row;
> > +        s->tile_group_info[tile_num].tile_column = tile_col;
> > +        s->tile_group_info[tile_num].tile_size_bytes = size_bytes;
> 
> This would be nicer as a compound literal

Will modify it. Thanks.

> 
> > +        if (s->raw_frame_header.uniform_tile_spacing_flag) {
> > +            s->tile_group_info[tile_num].tile_width_sbs =
> > +                (tile_col + 1) == s->raw_frame_header.tile_cols ?
> > +                sb_cols - tile_col * tile_width_sb :
> > +                tile_width_sb;
> > +            s->tile_group_info[tile_num].tile_height_sbs =
> > +                (tile_row + 1) == s->raw_frame_header.tile_rows ?
> > +                sb_rows - tile_row * tile_height_sb :
> > +                tile_height_sb;
> > +        } else {
> > +            s->tile_group_info[tile_num].tile_width_sbs =
> > +                s->raw_frame_header.width_in_sbs_minus_1[tile_col] + 1;
> > +            s->tile_group_info[tile_num].tile_height_sbs =
> > +                s->raw_frame_header.height_in_sbs_minus_1[tile_row] + 1;
> > +        }
> > +        offset += size;
> > +    }
> > +
> > +    return 0;
> > +
> > +}
> > +
> > +static int get_pixel_format(AVCodecContext *avctx) {
> > +    AV1DecContext *s = avctx->priv_data;
> > +    const AV1RawSequenceHeader *seq = &s->raw_seq;
> > +    uint8_t bit_depth;
> > +    int ret = 0;
> > +    enum AVPixelFormat pix_fmt = AV_PIX_FMT_NONE; #define
> HWACCEL_MAX
> > +(0)
> > +    enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
> > +
> > +    if (seq->seq_profile == 2 && seq->color_config.high_bitdepth)
> > +        bit_depth = seq->color_config.twelve_bit ? 12 : 10;
> > +    else if (seq->seq_profile <= 2)
> > +        bit_depth = seq->color_config.high_bitdepth ? 10 : 8;
> > +    else {
> > +        av_log(avctx, AV_LOG_ERROR,
> > +               "Unknow av1 profile %d.\n", seq->seq_profile);
> > +        return -1;
> > +    }
> > +
> > +    if (!seq->color_config.mono_chrome) {
> > +        // 4:4:4 x:0 y:0, 4:2:2 x:1 y:0, 4:2:0 x:1 y:1
> > +        if (seq->color_config.subsampling_x == 0 &&
> > +            seq->color_config.subsampling_y == 0) {
> > +            if (bit_depth == 8)
> > +                pix_fmt = AV_PIX_FMT_YUV444P;
> > +            else if (bit_depth == 10)
> > +                pix_fmt = AV_PIX_FMT_YUV444P10;
> > +            else if (bit_depth == 12)
> > +                pix_fmt = AV_PIX_FMT_YUV444P12;
> > +            else
> > +                av_log(avctx, AV_LOG_WARNING, "Unknow av1 pixel format.\n");
> > +        } else if (seq->color_config.subsampling_x == 1 &&
> > +                   seq->color_config.subsampling_y == 0) {
> > +            if (bit_depth == 8)
> > +                pix_fmt = AV_PIX_FMT_YUV422P;
> > +            else if (bit_depth == 10)
> > +                pix_fmt = AV_PIX_FMT_YUV422P10;
> > +            else if (bit_depth == 12)
> > +                pix_fmt = AV_PIX_FMT_YUV422P12;
> > +            else
> > +                av_log(avctx, AV_LOG_WARNING, "Unknow av1 pixel format.\n");
> > +        } else if (seq->color_config.subsampling_x == 1 &&
> > +                   seq->color_config.subsampling_y == 1) {
> > +            if (bit_depth == 8)
> > +                pix_fmt = AV_PIX_FMT_YUV420P;
> > +            else if (bit_depth == 10)
> > +                pix_fmt = AV_PIX_FMT_YUV420P10;
> > +            else if (bit_depth == 12)
> > +                pix_fmt = AV_PIX_FMT_YUV420P12;
> > +            else
> > +                av_log(avctx, AV_LOG_WARNING, "Unknow av1 pixel format.\n");
> > +        }
> > +    } else {
> > +        if (seq->color_config.subsampling_x == 1 &&
> > +            seq->color_config.subsampling_y == 1)
> > +            pix_fmt = AV_PIX_FMT_YUV440P;
> > +        else
> > +            av_log(avctx, AV_LOG_WARNING, "Unknow av1 pixel format.\n");
> > +    }
> > +
> > +    av_log(avctx, AV_LOG_DEBUG, "Av1 decode get format: %s.\n",
> > +           av_get_pix_fmt_name(pix_fmt));
> > +
> > +    if (pix_fmt == AV_PIX_FMT_NONE)
> > +        return -1;
> > +    s->pix_fmt = pix_fmt;
> > +
> > +    *fmtp++ = s->pix_fmt;
> > +    *fmtp = AV_PIX_FMT_NONE;
> > +    ret = ff_thread_get_format(avctx, pix_fmts);
> 
> Passing the software format here is bad, because it makes it appear to the user
> as though it can be decoded.  I think you would be better off setting avctx-
> >sw_pix_fmt to the software format and then only passing the available
> hardware formats to ff_get_format().
Yes, I checked vaapi_decode_find_best_format, it used avctx->sw_pix_fmt as
the reference fmt to find the best match fmt. Will modify it.

> 
> > +    if (ret < 0)
> > +        return ret;
> > +
> > +    avctx->pix_fmt = ret;
> 
> (ff_get_format() does make sure the returned value is in the list, which means if
> the unusable software format is in the list then it will be happy to return it.)
> 
> > +
> > +    return 0;
> > +}
> > +
> > +static void av1_frame_unref(AVCodecContext *avctx, AV1Frame *f) {
> > +    ff_thread_release_buffer(avctx, &f->tf);
> > +    av_buffer_unref(&f->hwaccel_priv_buf);
> > +    f->hwaccel_picture_private = NULL; }
> > +
> > +static int av1_frame_ref(AVCodecContext *avctx, AV1Frame *dst,
> > +AV1Frame *src) {
> > +    int ret = 0;
> > +
> > +    ret = ff_thread_ref_frame(&dst->tf, &src->tf);
> > +    if (ret < 0)
> > +        return ret;
> > +
> > +    if (src->hwaccel_picture_private) {
> > +        dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
> > +        if (!dst->hwaccel_priv_buf)
> > +            goto fail;
> > +        dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
> > +    }
> > +
> > +    dst->loop_filter_delta_enabled = src->loop_filter_delta_enabled;
> > +    memcpy(dst->loop_filter_ref_deltas,
> > +           src->loop_filter_ref_deltas,
> > +           AV1_TOTAL_REFS_PER_FRAME * sizeof(int8_t));
> > +    memcpy(dst->loop_filter_mode_deltas,
> > +           src->loop_filter_mode_deltas,
> > +           2 * sizeof(int8_t));
> > +    memcpy(dst->gm_type,
> > +           src->gm_type,
> > +           AV1_TOTAL_REFS_PER_FRAME * sizeof(uint8_t));
> > +    memcpy(dst->gm_params,
> > +           src->gm_params,
> > +           AV1_TOTAL_REFS_PER_FRAME * 6 * sizeof(int8_t));
> > +
> > +    return 0;
> > +
> > +fail:
> > +    av1_frame_unref(avctx, dst);
> > +    return AVERROR(ENOMEM);
> > +}
> > +
> > +static av_cold int av1_decode_free(AVCodecContext *avctx) {
> > +    AV1DecContext *s = avctx->priv_data;
> > +    int i;
> > +
> > +    for (i = 0; i < FF_ARRAY_ELEMS(s->ref); i++) {
> > +        if (s->ref[i].tf.f->buf[0])
> > +            av1_frame_unref(avctx, &s->ref[i]);
> > +        av_frame_free(&s->ref[i].tf.f);
> > +    }
> > +    if (s->cur_frame.tf.f->buf[0])
> > +        av1_frame_unref(avctx, &s->cur_frame);
> > +    av_frame_free(&s->cur_frame.tf.f);
> > +
> > +    ff_cbs_fragment_free(&s->current_obu);
> > +    ff_cbs_close(&s->cbc);
> > +
> > +    return 0;
> > +}
> > +
> > +static av_cold int av1_decode_init(AVCodecContext *avctx) {
> > +    AV1DecContext *s = avctx->priv_data;
> > +    int i;
> > +    int ret = 0;
> > +
> > +    for (i = 0; i < FF_ARRAY_ELEMS(s->ref); i++) {
> > +        s->ref[i].tf.f = av_frame_alloc();
> > +        if (!s->ref[i].tf.f) {
> > +            av1_decode_free(avctx);
> > +            av_log(avctx, AV_LOG_ERROR,
> > +                   "Fail to allocate frame buffer %d.\n", i);
> 
> In this message and others below you have written "Fail to X" where I think you
> should probably write "Failed to X".
OK, will modify this.

> 
> > +            return AVERROR(ENOMEM);
> > +        }
> > +    }
> > +
> > +        s->cur_frame.tf.f = av_frame_alloc();
> > +        if (!s->cur_frame.tf.f) {
> > +            av1_decode_free(avctx);
> > +            av_log(avctx, AV_LOG_ERROR, "Fail to allocate frame buffer.\n");
> > +            return AVERROR(ENOMEM);
> > +        }
> 
> Alignment is off.
Will fix this. Thanks.

> 
> > +
> > +    s->avctx = avctx;
> > +    s->pix_fmt = AV_PIX_FMT_NONE;
> > +
> > +    ret = ff_cbs_init(&s->cbc, AV_CODEC_ID_AV1, avctx);
> > +    if (ret < 0)
> > +        return ret;
> > +    return 0;
> > +}
> > +
> > +static int av1_frame_alloc(AVCodecContext *avctx, AV1Frame *f) {
> > +    int ret;
> > +    if ((ret = ff_thread_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF))
> < 0)
> > +        return ret;
> > +
> > +    if (avctx->hwaccel) {
> > +        const AVHWAccel *hwaccel = avctx->hwaccel;
> > +        if (hwaccel->frame_priv_data_size) {
> > +            f->hwaccel_priv_buf =
> > +                av_buffer_allocz(hwaccel->frame_priv_data_size);
> > +            if (!f->hwaccel_priv_buf)
> > +                goto fail;
> > +            f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
> > +        }
> > +    }
> > +    return 0;
> > +
> > +fail:
> > +    av1_frame_unref(avctx, f);
> > +    return AVERROR(ENOMEM);
> > +}
> > +
> > +static int set_output_frame(AVFrame *srcframe, AVFrame *frame,
> > +AVPacket *pkt) {
> > +    int ret = 0;
> > +    if ((ret = av_frame_ref(frame, srcframe)) < 0) {
> > +        return ret;
> > +    }
> > +
> > +    frame->pts = pkt->pts;
> > +    frame->pkt_dts = pkt->dts;
> > +
> > +    return 0;
> > +}
> > +
> > +static int update_reference_list (AVCodecContext *avctx) {
> > +    AV1DecContext *s = avctx->priv_data;
> > +    AV1RawFrameHeader *header= &s->raw_frame_header;
> > +    int i;
> > +    int ret = 0;
> > +
> > +    for (i = 0; i < 8; i++) {
> > +        if (header->frame_type == AV1_FRAME_KEY ||
> > +            (header->refresh_frame_flags & (1 << i))) {
> > +            if (s->ref[i].tf.f->buf[0])
> > +                av1_frame_unref(avctx, &s->ref[i]);
> > +            if ((ret = av1_frame_ref(avctx, &s->ref[i], &s->cur_frame)) < 0) {
> > +                av_log(avctx, AV_LOG_DEBUG, "Ref frame error:%d.\n", ret);
> > +                return ret;
> > +            }
> > +        }
> > +    }
> > +    return 0;
> > +}
> > +
> > +static int get_current_frame(AVCodecContext *avctx) {
> > +    AV1DecContext *s = avctx->priv_data;
> > +    int ret = 0;
> > +
> > +    if (s->cur_frame.tf.f->buf[0])
> > +        av1_frame_unref(avctx, &s->cur_frame);
> > +
> > +    ret = av1_frame_alloc(avctx, &s->cur_frame);
> > +    if (ret < 0) {
> > +        av_log(avctx, AV_LOG_ERROR, "Get current frame fail.\n");
> 
> This would be an allocation failure?  Maybe "Failed to allocate space for current
> frame", or something like that.
OK, will modify it. Thanks.

> 
> > +        return ret;
> > +    }
> > +
> > +    if (s->raw_frame_header.primary_ref_frame == AV1_PRIMARY_REF_NONE)
> > +        setup_past_independence(&s->cur_frame);
> > +    else
> > +        load_previous_and_update(s);
> > +
> > +    global_motion_params(s);
> > +
> > +    return ret;
> > +}
> > +
> > +static int decode_current_frame(AVCodecContext *avctx) {
> > +    int ret = 0;
> > +
> > +    if (avctx->hwaccel) {
> > +        ret = avctx->hwaccel->start_frame(avctx, NULL, 0);
> > +        if (ret < 0) {
> > +            av_log(avctx, AV_LOG_ERROR, "HW accel strat frame
> > + fail.\n");
> 
> Typo "strat".
Thanks, will fix this.

> 
> > +            return ret;
> > +        }
> 
> start_frame() is in the wrong place here?  All of the calls to decode_slice() for a
> frame should be between the matching start_frame() and end_frame().  (I guess
> this works for VAAPI because it doesn't mind what order the parameter buffers
> go in, but it may matter for others.)
Actually decode_slice is called first, for VAAPI it doesn't care the order of
start_frame/decode_slice. But consider other HW codec, I will modify it to the 
general order: start_frame, decode_sclice, end_frame.

Thanks
> 
> > +
> > +        ret = avctx->hwaccel->end_frame(avctx);
> > +        if (ret < 0) {
> > +            av_log(avctx, AV_LOG_ERROR, "HW accel end frame fail.\n");
> > +            return ret;
> > +        }
> > +    } else {
> > +        av_log(avctx, AV_LOG_ERROR, "Your platform doesn't suppport
> hardware "
> > +               "acceleration AV1 decode.\n");
> 
> I think you can assert on this one, because something must have gone very
> wrong to end up here.
Will modify this, thanks.

> 
> > +        return -1;
> > +    }
> > +
> > +    ret = update_reference_list(avctx);
> > +    if (ret < 0) {
> > +        av_log(avctx, AV_LOG_ERROR, "Fail to update reference list.\n");
> > +        return ret;
> > +    }
> > +    return 0;
> > +}
> > +
> > +static int av1_decode_frame(AVCodecContext *avctx, void *frame,
> > +                            int *got_frame, AVPacket *pkt) {
> > +    AV1DecContext *s = avctx->priv_data;
> > +    AV1RawSequenceHeader* raw_seq = NULL;
> > +    AV1RawFrameHeader *raw_frame_header = NULL;
> > +    AV1RawTileGroup *raw_tile_group = NULL;
> > +    uint32_t tile_group_offset = 0;
> > +    int i;
> > +    int ret = 0;
> > +
> > +    ret = ff_cbs_read_packet(s->cbc, &s->current_obu, pkt);
> > +    if (ret < 0) {
> > +        av_log(avctx, AV_LOG_ERROR, "Fail to read packet.\n");
> > +        goto end;
> > +    }
> > +    av_log(avctx, AV_LOG_DEBUG, "Total obu for this frame:%d.\n",
> > +           s->current_obu.nb_units);
> > +
> > +    for (i = 0; i < s->current_obu.nb_units; i++) {
> > +        CodedBitstreamUnit *unit = &s->current_obu.units[i];
> > +        AV1RawOBU *obu = unit->content;
> > +        av_log(avctx, AV_LOG_DEBUG, "Obu idx:%d, obu type:%d.\n", i,
> > + unit->type);
> > +
> > +        switch (unit->type) {
> > +        case AV1_OBU_SEQUENCE_HEADER:
> > +            raw_seq = &obu->obu.sequence_header;
> > +            memcpy(&s->raw_seq, raw_seq, sizeof(AV1RawSequenceHeader));
> > +            if (s->pix_fmt == AV_PIX_FMT_NONE) {
> > +                ret = get_pixel_format(avctx);
> > +                if (ret < 0) {
> > +                    av_log(avctx, AV_LOG_ERROR,
> > +                           "Fail to get format from sequence
> > + header.\n");
> 
> That message isn't quite right - the more likely failure here is that the hardware
> setup didn't work.
> 
> I think you want to make sure that all of the messages generated if someone
> tries to use this decoder without appropriate hardware are helpful.  (In both the
> "no API support" and the "has API support but the hardware isn't capable" cases.)
Will change the message here to "Failed to get pixel format", also add another
check of avctx->hwaccel inside get_pixel_format and show the hardware doesn't
support message if avctx->hwaccel==NULL.
 
> 
> > +                    goto end;
> > +                }
> > +            }
> > +            break;
> > +        case AV1_OBU_FRAME_HEADER:
> > +        case AV1_OBU_REDUNDANT_FRAME_HEADER:
> > +            raw_frame_header = &obu->obu.frame_header;
> > +            memcpy (&s->raw_frame_header,
> > +                    raw_frame_header,
> > +                    sizeof(AV1RawFrameHeader));
> > +            s->tile_num =
> > +                raw_frame_header->tile_cols * raw_frame_header->tile_rows;
> > +            if (raw_frame_header->show_existing_frame) {
> > +                if (set_output_frame(s->ref[raw_frame_header-
> >frame_to_show_map_idx].tf.f,
> > +                                     frame, pkt) < 0) {
> > +                    av_log(avctx, AV_LOG_DEBUG, "Set output frame error:%d.\n",
> > +                           ret);
> > +                    goto end;
> > +                }
> > +                *got_frame = 1;
> > +                goto end;
> > +            } else {
> > +                ret = get_current_frame(avctx);
> > +                if (ret < 0) {
> > +                    av_log(avctx, AV_LOG_DEBUG, "Get current frame error:%d.\n",
> > +                           ret);
> > +                    goto end;
> > +                }
> > +            }
> > +            break;
> > +        case AV1_OBU_FRAME:
> > +            raw_frame_header = &obu->obu.frame.header;
> > +            raw_tile_group = &obu->obu.frame.tile_group;
> > +            memcpy(&s->raw_frame_header,
> > +                   raw_frame_header,
> > +                   sizeof(AV1RawFrameHeader));
> > +            s->tile_num =
> > +                raw_frame_header->tile_cols * raw_frame_header->tile_rows;
> > +            tile_group_offset = raw_tile_group->tile_data.data - pkt->data;
> > +            get_tiles_info(avctx, raw_tile_group, tile_group_offset);
> > +
> > +            ret = get_current_frame(avctx);
> > +            if (ret < 0) {
> > +                av_log(avctx, AV_LOG_DEBUG, "Get current frame error:%d.\n",
> > +                       ret);
> > +                goto end;
> > +            }
> > +            if (avctx->hwaccel) {
> > +                ret = avctx->hwaccel->decode_slice(avctx, pkt->data, pkt->size);
> > +                if (ret < 0) {
> > +                    av_log(avctx, AV_LOG_ERROR,
> > +                           "HW accel decode slice fail.\n");
> > +                    return ret;
> > +                }
> > +            }
> > +            ret = decode_current_frame(avctx);
> > +            if (ret < 0) {
> > +                av_log(avctx, AV_LOG_DEBUG, "Decode current frame error:%d.\n",
> > +                       ret);
> > +                goto end;
> > +            }
> > +            if (raw_frame_header->show_frame) {
> > +                if (set_output_frame(s->cur_frame.tf.f, frame, pkt) < 0) {
> > +                    av_log(avctx, AV_LOG_DEBUG, "Set output frame error:%d.\n",
> > +                           ret);
> > +                    goto end;
> > +                }
> > +
> > +                *got_frame = 1;
> > +            }
> > +            break;
> > +        case AV1_OBU_TILE_GROUP:
> > +            raw_tile_group = &obu->obu.tile_group;
> > +            tile_group_offset = raw_tile_group->tile_data.data - pkt->data;
> > +            get_tiles_info(avctx, raw_tile_group, tile_group_offset);
> > +
> > +            if (avctx->hwaccel) {
> > +                ret = avctx->hwaccel->decode_slice(avctx, pkt->data, pkt->size);
> > +                if (ret < 0) {
> > +                    av_log(avctx, AV_LOG_ERROR,
> > +                           "HW accel decode slice fail.\n");
> > +                    return ret;
> > +                }
> > +            }
> > +            if (s->tile_num != raw_tile_group->tg_end + 1)
> > +                break;
> > +            ret = decode_current_frame(avctx);
> > +            if (ret < 0) {
> > +                av_log(avctx, AV_LOG_DEBUG, "Decode current frame error:%d.\n",
> > +                       ret);
> > +                goto end;
> > +            }
> > +
> > +            if (raw_frame_header->show_frame) {
> > +                if (set_output_frame(s->cur_frame.tf.f, frame, pkt) < 0) {
> > +                    av_log(avctx, AV_LOG_DEBUG, "Set output frame error:%d.\n",
> > +                           ret);
> > +                    goto end;
> > +                }
> > +
> > +                *got_frame = 1;
> > +            }
> 
> This section is the same as the FRAME case.  Maybe move it after the switch and
> finish decoding the frame if tile_num shows you are at the end of the frame?
Yes, will consider to merge them together. Thanks.

> 
> > +            break;
> > +        case AV1_OBU_TILE_LIST:
> > +        case AV1_OBU_TEMPORAL_DELIMITER:
> > +        case AV1_OBU_PADDING:
> > +        case AV1_OBU_METADATA:
> > +            break;
> > +        default:
> > +            av_log(avctx, AV_LOG_DEBUG,
> > +                   "Un-implemented obu type: %d (%zd bits).\n",
> > +                   unit->type, unit->data_size);
> > +        }
> > +    }
> > +
> > +end:
> > +    ff_cbs_fragment_reset(&s->current_obu);
> > +    return ret;
> > +}
> > +
> > +static void av1_decode_flush(AVCodecContext *avctx) {
> > +    AV1DecContext *s = avctx->priv_data;
> > +    int i;
> > +
> > +    for (i = 0; i < FF_ARRAY_ELEMS(s->ref); i++)
> > +        av1_frame_unref(avctx, &s->ref[i]);
> > +
> > +    av1_frame_unref(avctx, &s->cur_frame); }
> > +
> > +AVCodec ff_av1_decoder = {
> > +    .name                  = "av1",
> > +    .long_name             = NULL_IF_CONFIG_SMALL("Hardware Acceleration
> AV1"),
> > +    .type                  = AVMEDIA_TYPE_VIDEO,
> > +    .id                    = AV_CODEC_ID_AV1,
> > +    .priv_data_size        = sizeof(AV1DecContext),
> > +    .init                  = av1_decode_init,
> > +    .close                 = av1_decode_free,
> > +    .decode                = av1_decode_frame,
> > +    .capabilities          = AV_CODEC_CAP_DR1,
> > +    .flush                 = av1_decode_flush,
> > +    .profiles              = NULL_IF_CONFIG_SMALL(ff_av1_profiles),
> > +    .hw_configs            = (const AVCodecHWConfigInternal * []) {
> > +        NULL
> > +    },
> > +};
> > diff --git a/libavcodec/av1dec.h b/libavcodec/av1dec.h new file mode
> > 100644 index 0000000000..5cb0e896b7
> > --- /dev/null
> > +++ b/libavcodec/av1dec.h
> > @@ -0,0 +1,89 @@
> > +/*
> > + * AV1 video decoder
> > + * *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> > +02110-1301 USA  */
> > +
> > +#ifndef AVCODEC_AV1DEC_H
> > +#define AVCODEC_AV1DEC_H
> > +
> > +#include <stdatomic.h>
> > +#include "libavutil/buffer.h"
> > +#include "libavutil/md5.h"
> > +#include "avcodec.h"
> > +#include "bswapdsp.h"
> > +#include "get_bits.h"
> > +#include "av1_parse.h"
> > +#include "libavformat/avformat.h"
> > +#include "libavformat/av1.h"
> > +#include "thread.h"
> > +#include "cbs.h"
> > +#include "cbs_av1.h"
> 
> You should include the minimal number of other headers in the header itself,
> include them in the c file if needed.  That said, I think most of these are not used
> anyway.
OK, will modify it.

> 
> > +
> > +typedef enum AV1FrameRestorationType {
> > +    AV1_RESTORE_NONE,
> > +    AV1_RESTORE_WIENER,
> > +    AV1_RESTORE_SGRPROJ,
> > +    AV1_RESTORE_SWITCHABLE,
> > +} AV1FrameRestorationType;
> > +
> > +typedef struct AV1Frame {
> > +    ThreadFrame tf;
> > +
> > +    AVBufferRef *hwaccel_priv_buf;
> > +    void *hwaccel_picture_private;
> > +
> > +    uint8_t loop_filter_delta_enabled;
> > +    int8_t  loop_filter_ref_deltas[AV1_TOTAL_REFS_PER_FRAME];
> > +    int8_t  loop_filter_mode_deltas[2];
> > +    uint8_t gm_type[AV1_TOTAL_REFS_PER_FRAME];
> > +    int32_t gm_params[AV1_TOTAL_REFS_PER_FRAME][6];
> > +} AV1Frame;
> > +
> > +typedef struct TileGroupInfo {
> > +    uint32_t tile_group_offset;
> > +    uint32_t tile_offset;
> > +    uint32_t tile_size;
> > +    uint16_t tile_row;
> > +    uint16_t tile_column;
> > +    uint16_t tile_width_sbs;
> > +    uint16_t tile_height_sbs;
> > +    uint32_t tile_size_bytes;
> > +} TileGroupInfo;
> > +
> > +typedef struct AV1DecContext {
> > +    const AVClass *class;
> > +    AVCodecContext *avctx;
> > +
> > +    enum AVPixelFormat pix_fmt;
> > +    CodedBitstreamContext *cbc;
> > +    CodedBitstreamFragment current_obu;
> > +
> > +    AV1RawSequenceHeader raw_seq;
> > +    AV1RawFrameHeader raw_frame_header;
> > +    AV1RawTileGroup raw_tile_group;
> > +    TileGroupInfo tile_group_info[128];
> 
> Where did 128 come from?  I'm not aware of any restriction here beyond
> MAX_TILE_COLS and MAX_TILE_ROWS, but I admit my knowledge of the
> standard may be lacking.
> 
> Regardless, any limit here will need to also be checked in the code above to
> ensure a malicious stream can't overflow the array.
Will change it to compound literal, so that can avoid this kind of limit and overflow.
Thanks

> 
> > +    uint16_t tile_num;
> > +    uint16_t tg_start;
> > +    uint16_t tg_end;
> > +
> > +    AV1Frame ref[8];
> > +    AV1Frame cur_frame;
> > +
> > +} AV1DecContext;
> > +
> > +#endif /* AVCODEC_AV1DEC_H */
> > diff --git a/libavcodec/version.h b/libavcodec/version.h index
> > a3f9f828ee..5bdfdce363 100644
> > --- a/libavcodec/version.h
> > +++ b/libavcodec/version.h
> > @@ -28,7 +28,7 @@
> >   #include "libavutil/version.h"
> >
> >   #define LIBAVCODEC_VERSION_MAJOR  58 -#define
> > LIBAVCODEC_VERSION_MINOR 100
> > +#define LIBAVCODEC_VERSION_MINOR 101
> >   #define LIBAVCODEC_VERSION_MICRO 100
> >
> >   #define LIBAVCODEC_VERSION_INT
> > AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
> >
> 
> Thanks,
> 
> - Mark
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org
> with subject "unsubscribe".
James Almer Aug. 24, 2020, 1:10 p.m. UTC | #13
On 8/24/2020 3:57 AM, Wang, Fei W wrote:
> 
> 
>> -----Original Message-----
>> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of James
>> Almer
>> Sent: Friday, August 21, 2020 10:43 PM
>> To: ffmpeg-devel@ffmpeg.org
>> Subject: Re: [FFmpeg-devel] [PATCH 1/2] avcodec: add av1 hardware
>> acceleration decoder
>>
>> On 8/21/2020 2:29 AM, Fei Wang wrote:
>>> This av1 decoder is now only used for av1 hardware acceleration
>>> decoder. Consider it can be extend to a local decoder like hevc or vp9
>>> in the future, so define its name as "av1" and put it into external
>>> libraries codec list.
>>>
>>> Signed-off-by: Fei Wang <fei.w.wang@intel.com>
>>> ---
>>>  Changelog              |   1 +
>>>  configure              |   1 +
>>>  libavcodec/Makefile    |   1 +
>>>  libavcodec/allcodecs.c |   1 +
>>>  libavcodec/av1dec.c    | 746
>> +++++++++++++++++++++++++++++++++++++++++
>>>  libavcodec/av1dec.h    |  89 +++++
>>>  libavcodec/version.h   |   2 +-
>>>  7 files changed, 840 insertions(+), 1 deletion(-)  create mode 100644
>>> libavcodec/av1dec.c  create mode 100644 libavcodec/av1dec.h
>>>
>>> diff --git a/Changelog b/Changelog
>>> index 1efc768387..3ff88cc12f 100644
>>> --- a/Changelog
>>> +++ b/Changelog
>>> @@ -14,6 +14,7 @@ version <next>:
>>>  - ADPCM Argonaut Games encoder
>>>  - Argonaut Games ASF muxer
>>>  - AV1 Low overhead bitstream format demuxer
>>> +- AV1 decoder (Hardware acceleration used only)
>>>
>>>
>>>  version 4.3:
>>> diff --git a/configure b/configure
>>> index 6faff9bc7b..ef71e47c4e 100755
>>> --- a/configure
>>> +++ b/configure
>>> @@ -2685,6 +2685,7 @@ atrac3al_decoder_select="mdct"
>>>  atrac3p_decoder_select="mdct sinewin"
>>>  atrac3pal_decoder_select="mdct sinewin"
>>>  atrac9_decoder_select="mdct"
>>> +av1_decoder_select="cbs_av1"
>>>  avrn_decoder_select="exif jpegtables"
>>>  bink_decoder_select="blockdsp hpeldsp"
>>>  binkaudio_dct_decoder_select="mdct rdft dct sinewin wma_freqs"
>>> diff --git a/libavcodec/Makefile b/libavcodec/Makefile index
>>> 3431ba2dca..2371039bd2 100644
>>> --- a/libavcodec/Makefile
>>> +++ b/libavcodec/Makefile
>>> @@ -764,6 +764,7 @@ OBJS-$(CONFIG_ZLIB_DECODER)            += lcldec.o
>>>  OBJS-$(CONFIG_ZLIB_ENCODER)            += lclenc.o
>>>  OBJS-$(CONFIG_ZMBV_DECODER)            += zmbv.o
>>>  OBJS-$(CONFIG_ZMBV_ENCODER)            += zmbvenc.o
>>> +OBJS-$(CONFIG_AV1_DECODER)             += av1dec.o
>>>
>>>  # (AD)PCM decoders/encoders
>>>  OBJS-$(CONFIG_PCM_ALAW_DECODER)           += pcm.o
>>> diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c index
>>> 4bd830e5d0..00799d3431 100644
>>> --- a/libavcodec/allcodecs.c
>>> +++ b/libavcodec/allcodecs.c
>>> @@ -761,6 +761,7 @@ extern AVCodec ff_idf_decoder;
>>>   * above is available */
>>>  extern AVCodec ff_aac_mf_encoder;
>>>  extern AVCodec ff_ac3_mf_encoder;
>>> +extern AVCodec ff_av1_decoder;
>>>  extern AVCodec ff_h263_v4l2m2m_encoder;  extern AVCodec
>>> ff_libaom_av1_decoder;  extern AVCodec ff_libopenh264_encoder; diff
>>> --git a/libavcodec/av1dec.c b/libavcodec/av1dec.c new file mode 100644
>>> index 0000000000..ab88ed987a
>>> --- /dev/null
>>> +++ b/libavcodec/av1dec.c
>>> @@ -0,0 +1,746 @@
>>> +/*
>>> + * AV1video decoder
>>> + *
>>> + * This file is part of FFmpeg.
>>> + *
>>> + * FFmpeg is free software; you can redistribute it and/or
>>> + * modify it under the terms of the GNU Lesser General Public
>>> + * License as published by the Free Software Foundation; either
>>> + * version 2.1 of the License, or (at your option) any later version.
>>> + *
>>> + * FFmpeg is distributed in the hope that it will be useful,
>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>> + * Lesser General Public License for more details.
>>> + *
>>> + * You should have received a copy of the GNU Lesser General Public
>>> + * License along with FFmpeg; if not, write to the Free Software
>>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
>>> +02110-1301 USA  */
>>> +
>>> +#include "avcodec.h"
>>> +#include "get_bits.h"
>>> +#include "hwconfig.h"
>>> +#include "internal.h"
>>> +#include "profiles.h"
>>> +#include "thread.h"
>>> +#include "videodsp.h"
>>> +#include "libavutil/avassert.h"
>>> +#include "libavutil/pixdesc.h"
>>> +
>>> +#include "av1dec.h"
>>> +#include "libavformat/av1.h"
>>> +
>>> +static void setup_past_independence(AV1Frame *f) {
>>> +    f->loop_filter_delta_enabled = 1;
>>> +
>>> +    f->loop_filter_ref_deltas[AV1_REF_FRAME_INTRA] = 1;
>>> +    f->loop_filter_ref_deltas[AV1_REF_FRAME_LAST] = 0;
>>> +    f->loop_filter_ref_deltas[AV1_REF_FRAME_LAST2] = 0;
>>> +    f->loop_filter_ref_deltas[AV1_REF_FRAME_LAST3] = 0;
>>> +    f->loop_filter_ref_deltas[AV1_REF_FRAME_GOLDEN] = -1;
>>> +    f->loop_filter_ref_deltas[AV1_REF_FRAME_BWDREF] = 0;
>>> +    f->loop_filter_ref_deltas[AV1_REF_FRAME_ALTREF2] = -1;
>>> +    f->loop_filter_ref_deltas[AV1_REF_FRAME_ALTREF] = -1;
>>> +
>>> +    f->loop_filter_mode_deltas[0] = 0;
>>> +    f->loop_filter_mode_deltas[1] = 0; }
>>> +
>>> +static void load_previous_and_update(AV1DecContext *s) {
>>> +    int i;
>>> +
>>> +    memcpy(s->cur_frame.loop_filter_ref_deltas,
>>> +           s->ref[s-
>>> raw_frame_header.primary_ref_frame].loop_filter_ref_deltas,
>>> +           AV1_TOTAL_REFS_PER_FRAME * sizeof(int8_t));
>>> +    memcpy(s->cur_frame.loop_filter_mode_deltas,
>>> +           s->ref[s-
>>> raw_frame_header.primary_ref_frame].loop_filter_mode_deltas,
>>> +           2 * sizeof(int8_t));
>>> +
>>> +    if (s->raw_frame_header.loop_filter_delta_update) {
>>> +        for (i = 0; i < AV1_TOTAL_REFS_PER_FRAME; i++) {
>>> +            if (s->raw_frame_header.update_ref_delta[i])
>>> +                s->cur_frame.loop_filter_ref_deltas[i] =
>>> +                    s->raw_frame_header.loop_filter_ref_deltas[i];
>>> +        }
>>> +
>>> +        for (i = 0; i < 2; i++) {
>>> +            if (s->raw_frame_header.update_mode_delta[i])
>>> +                s->cur_frame.loop_filter_mode_deltas[i] =
>>> +                    s->raw_frame_header.loop_filter_mode_deltas[i];
>>> +        }
>>> +    }
>>> +
>>> +    s->cur_frame.loop_filter_delta_enabled =
>>> +        s->raw_frame_header.loop_filter_delta_enabled;
>>> +}
>>> +
>>> +static uint32_t inverse_recenter(int r, uint32_t v) {
>>> +    if (v > 2 * r)
>>> +        return v;
>>> +    else if (v & 1)
>>> +        return r - ((v + 1) >> 1);
>>> +    else
>>> +        return r + (v >> 1);
>>> +}
>>> +
>>> +static uint32_t decode_unsigned_subexp_with_ref(uint32_t sub_exp,
>>> +                                                int mx, int r) {
>>> +    if ((r << 1) <= mx) {
>>> +        return inverse_recenter(r, sub_exp);
>>> +    } else {
>>> +        return mx - 1 - inverse_recenter(mx - 1 - r, sub_exp);
>>> +    }
>>> +}
>>> +
>>> +static int32_t decode_signed_subexp_with_ref(uint32_t sub_exp, int low,
>>> +                                             int high, int r) {
>>> +    int32_t x = decode_unsigned_subexp_with_ref(sub_exp, high - low, r - low);
>>> +    return x + low;
>>> +}
>>> +
>>> +static void read_global_param(AV1DecContext *s, int type, int ref,
>>> +int idx) {
>>> +    uint32_t abs_bits, prec_bits, round, prec_diff, sub, mx, r;
>>> +    abs_bits = AV1_GM_ABS_ALPHA_BITS;
>>> +    prec_bits = AV1_GM_ALPHA_PREC_BITS;
>>> +
>>> +    if (idx < 2) {
>>> +        if (type == AV1_WARP_MODEL_TRANSLATION) {
>>> +            abs_bits = AV1_GM_ABS_TRANS_ONLY_BITS -
>>> +                !s->raw_frame_header.allow_high_precision_mv;
>>> +            prec_bits = AV1_GM_TRANS_ONLY_PREC_BITS -
>>> +                !s->raw_frame_header.allow_high_precision_mv;
>>> +        } else {
>>> +            abs_bits = AV1_GM_ABS_TRANS_BITS;
>>> +            prec_bits = AV1_GM_TRANS_PREC_BITS;
>>> +        }
>>> +    }
>>> +    round = (idx % 3) == 2 ? (1 << AV1_WARPEDMODEL_PREC_BITS) : 0;
>>> +    prec_diff = AV1_WARPEDMODEL_PREC_BITS - prec_bits;
>>> +    sub = (idx % 3) == 2 ? (1 << prec_bits) : 0;
>>> +    mx = 1 << abs_bits;
>>> +    r = (s->ref[s->raw_frame_header.primary_ref_frame].gm_params[ref][idx]
>>> +         >> prec_diff) - sub;
>>> +
>>> +    s->cur_frame.gm_params[ref][idx] =
>>> +        (decode_signed_subexp_with_ref(s-
>>> raw_frame_header.gm_params[ref][idx],
>>> +                                       -mx, mx + 1, r) << prec_diff)
>>> ++ round; }
>>> +
>>> +/**
>>> +* update gm type/params, since cbs already implemented part of this
>>> +funcation,
>>> +* so we don't need to full implement spec.
>>
>> FYI, the gm params code in cbs_av1 is currently buggy for some streams, and it
>> needs fixing.
> Thanks for your information. I will keep it in my mind. For clips(more than 200+) 
> that I tested, it looks good and I didn't meet any bug yet from cbs_av1. 
> 
> Thanks
> Fei

The known issues were fixed yesterday by Mark, so it should be good now.
James Almer Aug. 24, 2020, 1:12 p.m. UTC | #14
On 8/24/2020 5:39 AM, Wang, Fei W wrote:
>>> +static int get_tiles_info(AVCodecContext *avctx,
>>> +                          AV1RawTileGroup *tile_group,
>>> +                          uint32_t tile_group_offset) {
>>> +    AV1DecContext *s = avctx->priv_data;
>>> +    GetBitContext gb;
>>> +    uint16_t tile_num, tile_row, tile_col;
>>> +    uint16_t mi_cols, mi_rows, sb_cols, sb_rows, tile_width_sb, tile_height_sb;
>>> +    uint32_t size = 0, size_bytes = 0, offset = 0;
>>> +    int ret = 0;
>>> +
>>> +    if ((ret = init_get_bits8(&gb,
>>> +                              tile_group->tile_data.data,
>>> +                              tile_group->tile_data.data_size)) < 0) {
>>> +        av_log(avctx, AV_LOG_ERROR, "Fail to initialize bitstream reader.\n");
>>> +        return ret;
>>> +    }
>>> +
>>> +    s->tg_start = tile_group->tg_start;
>>> +    s->tg_end = tile_group->tg_end;
>>> +
>>> +    if (s->raw_frame_header.uniform_tile_spacing_flag) {
>>> +        mi_cols = 2 * ((s->raw_seq.max_frame_width_minus_1 + 8) >> 3);
>>> +        mi_rows = 2 * ((s->raw_seq.max_frame_height_minus_1 + 8) >> 3);
>>> +        sb_cols = s->raw_seq.use_128x128_superblock ? ((mi_cols + 31) >> 5)
>>> +                                                    : ((mi_cols + 15) >> 4);
>>> +        sb_rows = s->raw_seq.use_128x128_superblock ? ((mi_rows + 31) >> 5)
>>> +                                                    : ((mi_rows + 15) >> 4);
>>> +        tile_width_sb = (sb_cols + (1 << s->raw_frame_header.tile_cols_log2)
>>> +                         -1) >> s->raw_frame_header.tile_cols_log2;
>>> +        tile_height_sb = (sb_rows + (1 << s->raw_frame_header.tile_rows_log2)
>>> +                         -1) >> s->raw_frame_header.tile_rows_log2;
>> Maybe cbs read should always fill the width/height_in_sbs arrays so that all the
>> special casing for uniform_spacing_flag in this function isn't needed?  It would
>> be trivial to add it there.
> @James Almer, how do you think of this? Currently, cbs_av1 read 
> width/height_in_sbs_minus_1 directly from bitstream. But if uniform_spacing_flag is 1,
> the 2 values may need some calculation like here does.

Mark suggested a solution in the form of a patch as a reply to his own
email in this same thread.
Fei Wang Aug. 25, 2020, 2:42 a.m. UTC | #15
> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of James
> Almer
> Sent: Saturday, August 22, 2020 1:22 AM
> To: ffmpeg-devel@ffmpeg.org
> Subject: Re: [FFmpeg-devel] [PATCH 1/2] avcodec: add av1 hardware
> acceleration decoder
> 
> On 8/21/2020 2:05 PM, Derek Buitenhuis wrote:
> > On 21/08/2020 16:00, James Almer wrote:
> >> If you actually look at the patch, you'll notice that this is
> >> basically the template of what eventually will become the native
> >> decoder, that for now only includes the high level bitstream parsing
> >> and the hwaccel hooks (And thus will only output frames if one is
> >> plugged in). So the names are fine.
> >
> > Isn't there overlap with the parser here (global motion params)?
> 
> cbs_av1 parses global motion params, yes. I'm not sure why this decoder
> attempts to do it too, but it acknowledges the fact it's also done by CBS. Maybe
> Fei Wang can explain why the duplication.
It should not be duplication. According to spec, gm_params process is:
decode_signed_subexp_with_ref() {
    decode_unsigned_subexp_with_ref() {
        decode_subexp
        inverse_recenter
    }
}
And csb_av1 just implement decode_subexp part. And all other part
need the reference frame's gm_params to calculate current frame's
gm_params. So here is the calculation implementation by using result
from cbs_av1 with decode_subexp.

> 
> The cbs_av1 gm params parsing code should be fixed, in any case.
> 
> >
> > - Derek
> > _______________________________________________
> > ffmpeg-devel mailing list
> > ffmpeg-devel@ffmpeg.org
> > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> >
> > To unsubscribe, visit link above, or email
> > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
> >
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org
> with subject "unsubscribe".
diff mbox series

Patch

diff --git a/Changelog b/Changelog
index 1efc768387..3ff88cc12f 100644
--- a/Changelog
+++ b/Changelog
@@ -14,6 +14,7 @@  version <next>:
 - ADPCM Argonaut Games encoder
 - Argonaut Games ASF muxer
 - AV1 Low overhead bitstream format demuxer
+- AV1 decoder (Hardware acceleration used only)
 
 
 version 4.3:
diff --git a/configure b/configure
index 6faff9bc7b..ef71e47c4e 100755
--- a/configure
+++ b/configure
@@ -2685,6 +2685,7 @@  atrac3al_decoder_select="mdct"
 atrac3p_decoder_select="mdct sinewin"
 atrac3pal_decoder_select="mdct sinewin"
 atrac9_decoder_select="mdct"
+av1_decoder_select="cbs_av1"
 avrn_decoder_select="exif jpegtables"
 bink_decoder_select="blockdsp hpeldsp"
 binkaudio_dct_decoder_select="mdct rdft dct sinewin wma_freqs"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 3431ba2dca..2371039bd2 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -764,6 +764,7 @@  OBJS-$(CONFIG_ZLIB_DECODER)            += lcldec.o
 OBJS-$(CONFIG_ZLIB_ENCODER)            += lclenc.o
 OBJS-$(CONFIG_ZMBV_DECODER)            += zmbv.o
 OBJS-$(CONFIG_ZMBV_ENCODER)            += zmbvenc.o
+OBJS-$(CONFIG_AV1_DECODER)             += av1dec.o
 
 # (AD)PCM decoders/encoders
 OBJS-$(CONFIG_PCM_ALAW_DECODER)           += pcm.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 4bd830e5d0..00799d3431 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -761,6 +761,7 @@  extern AVCodec ff_idf_decoder;
  * above is available */
 extern AVCodec ff_aac_mf_encoder;
 extern AVCodec ff_ac3_mf_encoder;
+extern AVCodec ff_av1_decoder;
 extern AVCodec ff_h263_v4l2m2m_encoder;
 extern AVCodec ff_libaom_av1_decoder;
 extern AVCodec ff_libopenh264_encoder;
diff --git a/libavcodec/av1dec.c b/libavcodec/av1dec.c
new file mode 100644
index 0000000000..ab88ed987a
--- /dev/null
+++ b/libavcodec/av1dec.c
@@ -0,0 +1,746 @@ 
+/*
+ * AV1video decoder
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "get_bits.h"
+#include "hwconfig.h"
+#include "internal.h"
+#include "profiles.h"
+#include "thread.h"
+#include "videodsp.h"
+#include "libavutil/avassert.h"
+#include "libavutil/pixdesc.h"
+
+#include "av1dec.h"
+#include "libavformat/av1.h"
+
+static void setup_past_independence(AV1Frame *f)
+{
+    f->loop_filter_delta_enabled = 1;
+
+    f->loop_filter_ref_deltas[AV1_REF_FRAME_INTRA] = 1;
+    f->loop_filter_ref_deltas[AV1_REF_FRAME_LAST] = 0;
+    f->loop_filter_ref_deltas[AV1_REF_FRAME_LAST2] = 0;
+    f->loop_filter_ref_deltas[AV1_REF_FRAME_LAST3] = 0;
+    f->loop_filter_ref_deltas[AV1_REF_FRAME_GOLDEN] = -1;
+    f->loop_filter_ref_deltas[AV1_REF_FRAME_BWDREF] = 0;
+    f->loop_filter_ref_deltas[AV1_REF_FRAME_ALTREF2] = -1;
+    f->loop_filter_ref_deltas[AV1_REF_FRAME_ALTREF] = -1;
+
+    f->loop_filter_mode_deltas[0] = 0;
+    f->loop_filter_mode_deltas[1] = 0;
+}
+
+static void load_previous_and_update(AV1DecContext *s)
+{
+    int i;
+
+    memcpy(s->cur_frame.loop_filter_ref_deltas,
+           s->ref[s->raw_frame_header.primary_ref_frame].loop_filter_ref_deltas,
+           AV1_TOTAL_REFS_PER_FRAME * sizeof(int8_t));
+    memcpy(s->cur_frame.loop_filter_mode_deltas,
+           s->ref[s->raw_frame_header.primary_ref_frame].loop_filter_mode_deltas,
+           2 * sizeof(int8_t));
+
+    if (s->raw_frame_header.loop_filter_delta_update) {
+        for (i = 0; i < AV1_TOTAL_REFS_PER_FRAME; i++) {
+            if (s->raw_frame_header.update_ref_delta[i])
+                s->cur_frame.loop_filter_ref_deltas[i] =
+                    s->raw_frame_header.loop_filter_ref_deltas[i];
+        }
+
+        for (i = 0; i < 2; i++) {
+            if (s->raw_frame_header.update_mode_delta[i])
+                s->cur_frame.loop_filter_mode_deltas[i] =
+                    s->raw_frame_header.loop_filter_mode_deltas[i];
+        }
+    }
+
+    s->cur_frame.loop_filter_delta_enabled =
+        s->raw_frame_header.loop_filter_delta_enabled;
+}
+
+static uint32_t inverse_recenter(int r, uint32_t v)
+{
+    if (v > 2 * r)
+        return v;
+    else if (v & 1)
+        return r - ((v + 1) >> 1);
+    else
+        return r + (v >> 1);
+}
+
+static uint32_t decode_unsigned_subexp_with_ref(uint32_t sub_exp,
+                                                int mx, int r)
+{
+    if ((r << 1) <= mx) {
+        return inverse_recenter(r, sub_exp);
+    } else {
+        return mx - 1 - inverse_recenter(mx - 1 - r, sub_exp);
+    }
+}
+
+static int32_t decode_signed_subexp_with_ref(uint32_t sub_exp, int low,
+                                             int high, int r)
+{
+    int32_t x = decode_unsigned_subexp_with_ref(sub_exp, high - low, r - low);
+    return x + low;
+}
+
+static void read_global_param(AV1DecContext *s, int type, int ref, int idx)
+{
+    uint32_t abs_bits, prec_bits, round, prec_diff, sub, mx, r;
+    abs_bits = AV1_GM_ABS_ALPHA_BITS;
+    prec_bits = AV1_GM_ALPHA_PREC_BITS;
+
+    if (idx < 2) {
+        if (type == AV1_WARP_MODEL_TRANSLATION) {
+            abs_bits = AV1_GM_ABS_TRANS_ONLY_BITS -
+                !s->raw_frame_header.allow_high_precision_mv;
+            prec_bits = AV1_GM_TRANS_ONLY_PREC_BITS -
+                !s->raw_frame_header.allow_high_precision_mv;
+        } else {
+            abs_bits = AV1_GM_ABS_TRANS_BITS;
+            prec_bits = AV1_GM_TRANS_PREC_BITS;
+        }
+    }
+    round = (idx % 3) == 2 ? (1 << AV1_WARPEDMODEL_PREC_BITS) : 0;
+    prec_diff = AV1_WARPEDMODEL_PREC_BITS - prec_bits;
+    sub = (idx % 3) == 2 ? (1 << prec_bits) : 0;
+    mx = 1 << abs_bits;
+    r = (s->ref[s->raw_frame_header.primary_ref_frame].gm_params[ref][idx]
+         >> prec_diff) - sub;
+
+    s->cur_frame.gm_params[ref][idx] =
+        (decode_signed_subexp_with_ref(s->raw_frame_header.gm_params[ref][idx],
+                                       -mx, mx + 1, r) << prec_diff) + round;
+}
+
+/**
+* update gm type/params, since cbs already implemented part of this funcation,
+* so we don't need to full implement spec.
+*/
+static void global_motion_params(AV1DecContext *s)
+{
+    const AV1RawFrameHeader *header = &s->raw_frame_header;
+    int type;
+    int i, j;
+    for (i = AV1_REF_FRAME_LAST; i <= AV1_REF_FRAME_ALTREF; i++) {
+        s->cur_frame.gm_type[i] = AV1_WARP_MODEL_IDENTITY;
+        for (j = 0; j <= 5; j++)
+            s->cur_frame.gm_params[i][j] = (j % 3 == 2) ?
+                                           1 << AV1_WARPEDMODEL_PREC_BITS : 0;
+    }
+    if (header->frame_type == AV1_FRAME_KEY ||
+        header->frame_type == AV1_FRAME_INTRA_ONLY)
+        return;
+
+    for (i = AV1_REF_FRAME_LAST; i <= AV1_REF_FRAME_ALTREF; i++) {
+        if (header->is_global[i]) {
+            if (header->is_rot_zoom[i]) {
+                type = AV1_WARP_MODEL_ROTZOOM;
+            } else {
+                type = header->is_translation[i] ? AV1_WARP_MODEL_TRANSLATION
+                                                 : AV1_WARP_MODEL_AFFINE;
+            }
+        } else {
+            type = AV1_WARP_MODEL_IDENTITY;
+        }
+        s->cur_frame.gm_type[i] = type;
+
+        if (type >= AV1_WARP_MODEL_ROTZOOM) {
+            read_global_param(s, type, i, 2);
+            read_global_param(s, type, i, 3);
+            if (type == AV1_WARP_MODEL_AFFINE) {
+                read_global_param(s, type, i, 4);
+                read_global_param(s, type, i, 5);
+            } else {
+                s->cur_frame.gm_params[i][4] = - s->cur_frame.gm_params[i][3];
+                s->cur_frame.gm_params[i][5] = - s->cur_frame.gm_params[i][2];
+            }
+        }
+        if (type >= AV1_WARP_MODEL_TRANSLATION) {
+            read_global_param(s, type, i, 0);
+            read_global_param(s, type, i, 1);
+        }
+    }
+}
+
+static int get_tiles_info(AVCodecContext *avctx,
+                          AV1RawTileGroup *tile_group,
+                          uint32_t tile_group_offset)
+{
+    AV1DecContext *s = avctx->priv_data;
+    GetBitContext gb;
+    uint16_t tile_num, tile_row, tile_col;
+    uint16_t mi_cols, mi_rows, sb_cols, sb_rows, tile_width_sb, tile_height_sb;
+    uint32_t size = 0, size_bytes = 0, offset = 0;
+    int ret = 0;
+
+    if ((ret = init_get_bits8(&gb,
+                              tile_group->tile_data.data,
+                              tile_group->tile_data.data_size)) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Fail to initialize bitstream reader.\n");
+        return ret;
+    }
+
+    s->tg_start = tile_group->tg_start;
+    s->tg_end = tile_group->tg_end;
+
+    if (s->raw_frame_header.uniform_tile_spacing_flag) {
+        mi_cols = 2 * ((s->raw_seq.max_frame_width_minus_1 + 8) >> 3);
+        mi_rows = 2 * ((s->raw_seq.max_frame_height_minus_1 + 8) >> 3);
+        sb_cols = s->raw_seq.use_128x128_superblock ? ((mi_cols + 31) >> 5)
+                                                    : ((mi_cols + 15) >> 4);
+        sb_rows = s->raw_seq.use_128x128_superblock ? ((mi_rows + 31) >> 5)
+                                                    : ((mi_rows + 15) >> 4);
+        tile_width_sb = (sb_cols + (1 << s->raw_frame_header.tile_cols_log2)
+                         -1) >> s->raw_frame_header.tile_cols_log2;
+        tile_height_sb = (sb_rows + (1 << s->raw_frame_header.tile_rows_log2)
+                         -1) >> s->raw_frame_header.tile_rows_log2;
+    }
+
+    for (tile_num = tile_group->tg_start; tile_num <= tile_group->tg_end; tile_num++) {
+        tile_row = tile_num / s->raw_frame_header.tile_cols;
+        tile_col = tile_num % s->raw_frame_header.tile_cols;
+
+        if (tile_num == tile_group->tg_end) {
+            s->tile_group_info[tile_num].tile_group_offset = tile_group_offset;
+            s->tile_group_info[tile_num].tile_size = get_bits_left(&gb) / 8;
+            s->tile_group_info[tile_num].tile_offset = offset;
+            s->tile_group_info[tile_num].tile_row = tile_row;
+            s->tile_group_info[tile_num].tile_column = tile_col;
+            s->tile_group_info[tile_num].tile_size_bytes = size_bytes;
+            if (s->raw_frame_header.uniform_tile_spacing_flag) {
+                s->tile_group_info[tile_num].tile_width_sbs =
+                    (tile_col + 1) == s->raw_frame_header.tile_cols ?
+                    sb_cols - tile_col * tile_width_sb :
+                    tile_width_sb;
+                s->tile_group_info[tile_num].tile_height_sbs =
+                    (tile_row + 1) == s->raw_frame_header.tile_rows ?
+                    sb_rows - tile_row * tile_height_sb :
+                    tile_height_sb;
+            } else {
+                s->tile_group_info[tile_num].tile_width_sbs =
+                    s->raw_frame_header.width_in_sbs_minus_1[tile_col] + 1;
+                s->tile_group_info[tile_num].tile_height_sbs =
+                    s->raw_frame_header.height_in_sbs_minus_1[tile_row] + 1;
+            }
+            return 0;
+        }
+        size_bytes = s->raw_frame_header.tile_size_bytes_minus1 + 1;
+        size = get_bits_le(&gb, size_bytes * 8) + 1;
+        skip_bits(&gb, size * 8);
+
+        offset += size_bytes;
+
+        s->tile_group_info[tile_num].tile_group_offset = tile_group_offset;
+        s->tile_group_info[tile_num].tile_size = size;
+        s->tile_group_info[tile_num].tile_offset = offset;
+        s->tile_group_info[tile_num].tile_row = tile_row;
+        s->tile_group_info[tile_num].tile_column = tile_col;
+        s->tile_group_info[tile_num].tile_size_bytes = size_bytes;
+        if (s->raw_frame_header.uniform_tile_spacing_flag) {
+            s->tile_group_info[tile_num].tile_width_sbs =
+                (tile_col + 1) == s->raw_frame_header.tile_cols ?
+                sb_cols - tile_col * tile_width_sb :
+                tile_width_sb;
+            s->tile_group_info[tile_num].tile_height_sbs =
+                (tile_row + 1) == s->raw_frame_header.tile_rows ?
+                sb_rows - tile_row * tile_height_sb :
+                tile_height_sb;
+        } else {
+            s->tile_group_info[tile_num].tile_width_sbs =
+                s->raw_frame_header.width_in_sbs_minus_1[tile_col] + 1;
+            s->tile_group_info[tile_num].tile_height_sbs =
+                s->raw_frame_header.height_in_sbs_minus_1[tile_row] + 1;
+        }
+        offset += size;
+    }
+
+    return 0;
+
+}
+
+static int get_pixel_format(AVCodecContext *avctx)
+{
+    AV1DecContext *s = avctx->priv_data;
+    const AV1RawSequenceHeader *seq = &s->raw_seq;
+    uint8_t bit_depth;
+    int ret = 0;
+    enum AVPixelFormat pix_fmt = AV_PIX_FMT_NONE;
+#define HWACCEL_MAX (0)
+    enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
+
+    if (seq->seq_profile == 2 && seq->color_config.high_bitdepth)
+        bit_depth = seq->color_config.twelve_bit ? 12 : 10;
+    else if (seq->seq_profile <= 2)
+        bit_depth = seq->color_config.high_bitdepth ? 10 : 8;
+    else {
+        av_log(avctx, AV_LOG_ERROR,
+               "Unknow av1 profile %d.\n", seq->seq_profile);
+        return -1;
+    }
+
+    if (!seq->color_config.mono_chrome) {
+        // 4:4:4 x:0 y:0, 4:2:2 x:1 y:0, 4:2:0 x:1 y:1
+        if (seq->color_config.subsampling_x == 0 &&
+            seq->color_config.subsampling_y == 0) {
+            if (bit_depth == 8)
+                pix_fmt = AV_PIX_FMT_YUV444P;
+            else if (bit_depth == 10)
+                pix_fmt = AV_PIX_FMT_YUV444P10;
+            else if (bit_depth == 12)
+                pix_fmt = AV_PIX_FMT_YUV444P12;
+            else
+                av_log(avctx, AV_LOG_WARNING, "Unknow av1 pixel format.\n");
+        } else if (seq->color_config.subsampling_x == 1 &&
+                   seq->color_config.subsampling_y == 0) {
+            if (bit_depth == 8)
+                pix_fmt = AV_PIX_FMT_YUV422P;
+            else if (bit_depth == 10)
+                pix_fmt = AV_PIX_FMT_YUV422P10;
+            else if (bit_depth == 12)
+                pix_fmt = AV_PIX_FMT_YUV422P12;
+            else
+                av_log(avctx, AV_LOG_WARNING, "Unknow av1 pixel format.\n");
+        } else if (seq->color_config.subsampling_x == 1 &&
+                   seq->color_config.subsampling_y == 1) {
+            if (bit_depth == 8)
+                pix_fmt = AV_PIX_FMT_YUV420P;
+            else if (bit_depth == 10)
+                pix_fmt = AV_PIX_FMT_YUV420P10;
+            else if (bit_depth == 12)
+                pix_fmt = AV_PIX_FMT_YUV420P12;
+            else
+                av_log(avctx, AV_LOG_WARNING, "Unknow av1 pixel format.\n");
+        }
+    } else {
+        if (seq->color_config.subsampling_x == 1 &&
+            seq->color_config.subsampling_y == 1)
+            pix_fmt = AV_PIX_FMT_YUV440P;
+        else
+            av_log(avctx, AV_LOG_WARNING, "Unknow av1 pixel format.\n");
+    }
+
+    av_log(avctx, AV_LOG_DEBUG, "Av1 decode get format: %s.\n",
+           av_get_pix_fmt_name(pix_fmt));
+
+    if (pix_fmt == AV_PIX_FMT_NONE)
+        return -1;
+    s->pix_fmt = pix_fmt;
+
+    *fmtp++ = s->pix_fmt;
+    *fmtp = AV_PIX_FMT_NONE;
+    ret = ff_thread_get_format(avctx, pix_fmts);
+    if (ret < 0)
+        return ret;
+
+    avctx->pix_fmt = ret;
+
+    return 0;
+}
+
+static void av1_frame_unref(AVCodecContext *avctx, AV1Frame *f)
+{
+    ff_thread_release_buffer(avctx, &f->tf);
+    av_buffer_unref(&f->hwaccel_priv_buf);
+    f->hwaccel_picture_private = NULL;
+}
+
+static int av1_frame_ref(AVCodecContext *avctx, AV1Frame *dst, AV1Frame *src)
+{
+    int ret = 0;
+
+    ret = ff_thread_ref_frame(&dst->tf, &src->tf);
+    if (ret < 0)
+        return ret;
+
+    if (src->hwaccel_picture_private) {
+        dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
+        if (!dst->hwaccel_priv_buf)
+            goto fail;
+        dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
+    }
+
+    dst->loop_filter_delta_enabled = src->loop_filter_delta_enabled;
+    memcpy(dst->loop_filter_ref_deltas,
+           src->loop_filter_ref_deltas,
+           AV1_TOTAL_REFS_PER_FRAME * sizeof(int8_t));
+    memcpy(dst->loop_filter_mode_deltas,
+           src->loop_filter_mode_deltas,
+           2 * sizeof(int8_t));
+    memcpy(dst->gm_type,
+           src->gm_type,
+           AV1_TOTAL_REFS_PER_FRAME * sizeof(uint8_t));
+    memcpy(dst->gm_params,
+           src->gm_params,
+           AV1_TOTAL_REFS_PER_FRAME * 6 * sizeof(int8_t));
+
+    return 0;
+
+fail:
+    av1_frame_unref(avctx, dst);
+    return AVERROR(ENOMEM);
+}
+
+static av_cold int av1_decode_free(AVCodecContext *avctx)
+{
+    AV1DecContext *s = avctx->priv_data;
+    int i;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(s->ref); i++) {
+        if (s->ref[i].tf.f->buf[0])
+            av1_frame_unref(avctx, &s->ref[i]);
+        av_frame_free(&s->ref[i].tf.f);
+    }
+    if (s->cur_frame.tf.f->buf[0])
+        av1_frame_unref(avctx, &s->cur_frame);
+    av_frame_free(&s->cur_frame.tf.f);
+
+    ff_cbs_fragment_free(&s->current_obu);
+    ff_cbs_close(&s->cbc);
+
+    return 0;
+}
+
+static av_cold int av1_decode_init(AVCodecContext *avctx)
+{
+    AV1DecContext *s = avctx->priv_data;
+    int i;
+    int ret = 0;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(s->ref); i++) {
+        s->ref[i].tf.f = av_frame_alloc();
+        if (!s->ref[i].tf.f) {
+            av1_decode_free(avctx);
+            av_log(avctx, AV_LOG_ERROR,
+                   "Fail to allocate frame buffer %d.\n", i);
+            return AVERROR(ENOMEM);
+        }
+    }
+
+        s->cur_frame.tf.f = av_frame_alloc();
+        if (!s->cur_frame.tf.f) {
+            av1_decode_free(avctx);
+            av_log(avctx, AV_LOG_ERROR, "Fail to allocate frame buffer.\n");
+            return AVERROR(ENOMEM);
+        }
+
+    s->avctx = avctx;
+    s->pix_fmt = AV_PIX_FMT_NONE;
+
+    ret = ff_cbs_init(&s->cbc, AV_CODEC_ID_AV1, avctx);
+    if (ret < 0)
+        return ret;
+    return 0;
+}
+
+static int av1_frame_alloc(AVCodecContext *avctx, AV1Frame *f)
+{
+    int ret;
+    if ((ret = ff_thread_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
+        return ret;
+
+    if (avctx->hwaccel) {
+        const AVHWAccel *hwaccel = avctx->hwaccel;
+        if (hwaccel->frame_priv_data_size) {
+            f->hwaccel_priv_buf =
+                av_buffer_allocz(hwaccel->frame_priv_data_size);
+            if (!f->hwaccel_priv_buf)
+                goto fail;
+            f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
+        }
+    }
+    return 0;
+
+fail:
+    av1_frame_unref(avctx, f);
+    return AVERROR(ENOMEM);
+}
+
+static int set_output_frame(AVFrame *srcframe, AVFrame *frame, AVPacket *pkt)
+{
+    int ret = 0;
+    if ((ret = av_frame_ref(frame, srcframe)) < 0) {
+        return ret;
+    }
+
+    frame->pts = pkt->pts;
+    frame->pkt_dts = pkt->dts;
+
+    return 0;
+}
+
+static int update_reference_list (AVCodecContext *avctx)
+{
+    AV1DecContext *s = avctx->priv_data;
+    AV1RawFrameHeader *header= &s->raw_frame_header;
+    int i;
+    int ret = 0;
+
+    for (i = 0; i < 8; i++) {
+        if (header->frame_type == AV1_FRAME_KEY ||
+            (header->refresh_frame_flags & (1 << i))) {
+            if (s->ref[i].tf.f->buf[0])
+                av1_frame_unref(avctx, &s->ref[i]);
+            if ((ret = av1_frame_ref(avctx, &s->ref[i], &s->cur_frame)) < 0) {
+                av_log(avctx, AV_LOG_DEBUG, "Ref frame error:%d.\n", ret);
+                return ret;
+            }
+        }
+    }
+    return 0;
+}
+
+static int get_current_frame(AVCodecContext *avctx)
+{
+    AV1DecContext *s = avctx->priv_data;
+    int ret = 0;
+
+    if (s->cur_frame.tf.f->buf[0])
+        av1_frame_unref(avctx, &s->cur_frame);
+
+    ret = av1_frame_alloc(avctx, &s->cur_frame);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Get current frame fail.\n");
+        return ret;
+    }
+
+    if (s->raw_frame_header.primary_ref_frame == AV1_PRIMARY_REF_NONE)
+        setup_past_independence(&s->cur_frame);
+    else
+        load_previous_and_update(s);
+
+    global_motion_params(s);
+
+    return ret;
+}
+
+static int decode_current_frame(AVCodecContext *avctx)
+{
+    int ret = 0;
+
+    if (avctx->hwaccel) {
+        ret = avctx->hwaccel->start_frame(avctx, NULL, 0);
+        if (ret < 0) {
+            av_log(avctx, AV_LOG_ERROR, "HW accel strat frame fail.\n");
+            return ret;
+        }
+
+        ret = avctx->hwaccel->end_frame(avctx);
+        if (ret < 0) {
+            av_log(avctx, AV_LOG_ERROR, "HW accel end frame fail.\n");
+            return ret;
+        }
+    } else {
+        av_log(avctx, AV_LOG_ERROR, "Your platform doesn't suppport hardware "
+               "acceleration AV1 decode.\n");
+        return -1;
+    }
+
+    ret = update_reference_list(avctx);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Fail to update reference list.\n");
+        return ret;
+    }
+    return 0;
+}
+
+static int av1_decode_frame(AVCodecContext *avctx, void *frame,
+                            int *got_frame, AVPacket *pkt)
+{
+    AV1DecContext *s = avctx->priv_data;
+    AV1RawSequenceHeader* raw_seq = NULL;
+    AV1RawFrameHeader *raw_frame_header = NULL;
+    AV1RawTileGroup *raw_tile_group = NULL;
+    uint32_t tile_group_offset = 0;
+    int i;
+    int ret = 0;
+
+    ret = ff_cbs_read_packet(s->cbc, &s->current_obu, pkt);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Fail to read packet.\n");
+        goto end;
+    }
+    av_log(avctx, AV_LOG_DEBUG, "Total obu for this frame:%d.\n",
+           s->current_obu.nb_units);
+
+    for (i = 0; i < s->current_obu.nb_units; i++) {
+        CodedBitstreamUnit *unit = &s->current_obu.units[i];
+        AV1RawOBU *obu = unit->content;
+        av_log(avctx, AV_LOG_DEBUG, "Obu idx:%d, obu type:%d.\n", i, unit->type);
+
+        switch (unit->type) {
+        case AV1_OBU_SEQUENCE_HEADER:
+            raw_seq = &obu->obu.sequence_header;
+            memcpy(&s->raw_seq, raw_seq, sizeof(AV1RawSequenceHeader));
+            if (s->pix_fmt == AV_PIX_FMT_NONE) {
+                ret = get_pixel_format(avctx);
+                if (ret < 0) {
+                    av_log(avctx, AV_LOG_ERROR,
+                           "Fail to get format from sequence header.\n");
+                    goto end;
+                }
+            }
+            break;
+        case AV1_OBU_FRAME_HEADER:
+        case AV1_OBU_REDUNDANT_FRAME_HEADER:
+            raw_frame_header = &obu->obu.frame_header;
+            memcpy (&s->raw_frame_header,
+                    raw_frame_header,
+                    sizeof(AV1RawFrameHeader));
+            s->tile_num =
+                raw_frame_header->tile_cols * raw_frame_header->tile_rows;
+            if (raw_frame_header->show_existing_frame) {
+                if (set_output_frame(s->ref[raw_frame_header->frame_to_show_map_idx].tf.f,
+                                     frame, pkt) < 0) {
+                    av_log(avctx, AV_LOG_DEBUG, "Set output frame error:%d.\n",
+                           ret);
+                    goto end;
+                }
+                *got_frame = 1;
+                goto end;
+            } else {
+                ret = get_current_frame(avctx);
+                if (ret < 0) {
+                    av_log(avctx, AV_LOG_DEBUG, "Get current frame error:%d.\n",
+                           ret);
+                    goto end;
+                }
+            }
+            break;
+        case AV1_OBU_FRAME:
+            raw_frame_header = &obu->obu.frame.header;
+            raw_tile_group = &obu->obu.frame.tile_group;
+            memcpy(&s->raw_frame_header,
+                   raw_frame_header,
+                   sizeof(AV1RawFrameHeader));
+            s->tile_num =
+                raw_frame_header->tile_cols * raw_frame_header->tile_rows;
+            tile_group_offset = raw_tile_group->tile_data.data - pkt->data;
+            get_tiles_info(avctx, raw_tile_group, tile_group_offset);
+
+            ret = get_current_frame(avctx);
+            if (ret < 0) {
+                av_log(avctx, AV_LOG_DEBUG, "Get current frame error:%d.\n",
+                       ret);
+                goto end;
+            }
+            if (avctx->hwaccel) {
+                ret = avctx->hwaccel->decode_slice(avctx, pkt->data, pkt->size);
+                if (ret < 0) {
+                    av_log(avctx, AV_LOG_ERROR,
+                           "HW accel decode slice fail.\n");
+                    return ret;
+                }
+            }
+            ret = decode_current_frame(avctx);
+            if (ret < 0) {
+                av_log(avctx, AV_LOG_DEBUG, "Decode current frame error:%d.\n",
+                       ret);
+                goto end;
+            }
+            if (raw_frame_header->show_frame) {
+                if (set_output_frame(s->cur_frame.tf.f, frame, pkt) < 0) {
+                    av_log(avctx, AV_LOG_DEBUG, "Set output frame error:%d.\n",
+                           ret);
+                    goto end;
+                }
+
+                *got_frame = 1;
+            }
+            break;
+        case AV1_OBU_TILE_GROUP:
+            raw_tile_group = &obu->obu.tile_group;
+            tile_group_offset = raw_tile_group->tile_data.data - pkt->data;
+            get_tiles_info(avctx, raw_tile_group, tile_group_offset);
+
+            if (avctx->hwaccel) {
+                ret = avctx->hwaccel->decode_slice(avctx, pkt->data, pkt->size);
+                if (ret < 0) {
+                    av_log(avctx, AV_LOG_ERROR,
+                           "HW accel decode slice fail.\n");
+                    return ret;
+                }
+            }
+            if (s->tile_num != raw_tile_group->tg_end + 1)
+                break;
+            ret = decode_current_frame(avctx);
+            if (ret < 0) {
+                av_log(avctx, AV_LOG_DEBUG, "Decode current frame error:%d.\n",
+                       ret);
+                goto end;
+            }
+
+            if (raw_frame_header->show_frame) {
+                if (set_output_frame(s->cur_frame.tf.f, frame, pkt) < 0) {
+                    av_log(avctx, AV_LOG_DEBUG, "Set output frame error:%d.\n",
+                           ret);
+                    goto end;
+                }
+
+                *got_frame = 1;
+            }
+            break;
+        case AV1_OBU_TILE_LIST:
+        case AV1_OBU_TEMPORAL_DELIMITER:
+        case AV1_OBU_PADDING:
+        case AV1_OBU_METADATA:
+            break;
+        default:
+            av_log(avctx, AV_LOG_DEBUG,
+                   "Un-implemented obu type: %d (%zd bits).\n",
+                   unit->type, unit->data_size);
+        }
+    }
+
+end:
+    ff_cbs_fragment_reset(&s->current_obu);
+    return ret;
+}
+
+static void av1_decode_flush(AVCodecContext *avctx)
+{
+    AV1DecContext *s = avctx->priv_data;
+    int i;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(s->ref); i++)
+        av1_frame_unref(avctx, &s->ref[i]);
+
+    av1_frame_unref(avctx, &s->cur_frame);
+}
+
+AVCodec ff_av1_decoder = {
+    .name                  = "av1",
+    .long_name             = NULL_IF_CONFIG_SMALL("Hardware Acceleration AV1"),
+    .type                  = AVMEDIA_TYPE_VIDEO,
+    .id                    = AV_CODEC_ID_AV1,
+    .priv_data_size        = sizeof(AV1DecContext),
+    .init                  = av1_decode_init,
+    .close                 = av1_decode_free,
+    .decode                = av1_decode_frame,
+    .capabilities          = AV_CODEC_CAP_DR1,
+    .flush                 = av1_decode_flush,
+    .profiles              = NULL_IF_CONFIG_SMALL(ff_av1_profiles),
+    .hw_configs            = (const AVCodecHWConfigInternal * []) {
+        NULL
+    },
+};
diff --git a/libavcodec/av1dec.h b/libavcodec/av1dec.h
new file mode 100644
index 0000000000..5cb0e896b7
--- /dev/null
+++ b/libavcodec/av1dec.h
@@ -0,0 +1,89 @@ 
+/*
+ * AV1 video decoder
+ * *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AV1DEC_H
+#define AVCODEC_AV1DEC_H
+
+#include <stdatomic.h>
+#include "libavutil/buffer.h"
+#include "libavutil/md5.h"
+#include "avcodec.h"
+#include "bswapdsp.h"
+#include "get_bits.h"
+#include "av1_parse.h"
+#include "libavformat/avformat.h"
+#include "libavformat/av1.h"
+#include "thread.h"
+#include "cbs.h"
+#include "cbs_av1.h"
+
+typedef enum AV1FrameRestorationType {
+    AV1_RESTORE_NONE,
+    AV1_RESTORE_WIENER,
+    AV1_RESTORE_SGRPROJ,
+    AV1_RESTORE_SWITCHABLE,
+} AV1FrameRestorationType;
+
+typedef struct AV1Frame {
+    ThreadFrame tf;
+
+    AVBufferRef *hwaccel_priv_buf;
+    void *hwaccel_picture_private;
+
+    uint8_t loop_filter_delta_enabled;
+    int8_t  loop_filter_ref_deltas[AV1_TOTAL_REFS_PER_FRAME];
+    int8_t  loop_filter_mode_deltas[2];
+    uint8_t gm_type[AV1_TOTAL_REFS_PER_FRAME];
+    int32_t gm_params[AV1_TOTAL_REFS_PER_FRAME][6];
+} AV1Frame;
+
+typedef struct TileGroupInfo {
+    uint32_t tile_group_offset;
+    uint32_t tile_offset;
+    uint32_t tile_size;
+    uint16_t tile_row;
+    uint16_t tile_column;
+    uint16_t tile_width_sbs;
+    uint16_t tile_height_sbs;
+    uint32_t tile_size_bytes;
+} TileGroupInfo;
+
+typedef struct AV1DecContext {
+    const AVClass *class;
+    AVCodecContext *avctx;
+
+    enum AVPixelFormat pix_fmt;
+    CodedBitstreamContext *cbc;
+    CodedBitstreamFragment current_obu;
+
+    AV1RawSequenceHeader raw_seq;
+    AV1RawFrameHeader raw_frame_header;
+    AV1RawTileGroup raw_tile_group;
+    TileGroupInfo tile_group_info[128];
+    uint16_t tile_num;
+    uint16_t tg_start;
+    uint16_t tg_end;
+
+    AV1Frame ref[8];
+    AV1Frame cur_frame;
+
+} AV1DecContext;
+
+#endif /* AVCODEC_AV1DEC_H */
diff --git a/libavcodec/version.h b/libavcodec/version.h
index a3f9f828ee..5bdfdce363 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -28,7 +28,7 @@ 
 #include "libavutil/version.h"
 
 #define LIBAVCODEC_VERSION_MAJOR  58
-#define LIBAVCODEC_VERSION_MINOR 100
+#define LIBAVCODEC_VERSION_MINOR 101
 #define LIBAVCODEC_VERSION_MICRO 100
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \