diff mbox series

[FFmpeg-devel,v2,6/6] lavc/vaapi_encode: Add VAAPI AV1 encoder

Message ID 20230718031203.87803-6-fei.w.wang@intel.com
State New
Headers show
Series [FFmpeg-devel,v2,1/6] avcodec/cbs_av1: Add tx mode enum values | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Wang, Fei W July 18, 2023, 3:12 a.m. UTC
Signed-off-by: Fei Wang <fei.w.wang@intel.com>
---
update
1. set color_range in sequence header.

 Changelog                     |    1 +
 configure                     |    3 +
 doc/encoders.texi             |   13 +
 libavcodec/Makefile           |    1 +
 libavcodec/allcodecs.c        |    1 +
 libavcodec/vaapi_encode.c     |  125 +++-
 libavcodec/vaapi_encode.h     |   12 +
 libavcodec/vaapi_encode_av1.c | 1229 +++++++++++++++++++++++++++++++++
 8 files changed, 1367 insertions(+), 18 deletions(-)
 create mode 100644 libavcodec/vaapi_encode_av1.c

Comments

Neal Gompa July 31, 2023, 8:04 p.m. UTC | #1
On Tue, Jul 18, 2023 at 4:13 AM Fei Wang
<fei.w.wang-at-intel.com@ffmpeg.org> wrote:
>
> Signed-off-by: Fei Wang <fei.w.wang@intel.com>
> ---
> update
> 1. set color_range in sequence header.
>
>  Changelog                     |    1 +
>  configure                     |    3 +
>  doc/encoders.texi             |   13 +
>  libavcodec/Makefile           |    1 +
>  libavcodec/allcodecs.c        |    1 +
>  libavcodec/vaapi_encode.c     |  125 +++-
>  libavcodec/vaapi_encode.h     |   12 +
>  libavcodec/vaapi_encode_av1.c | 1229 +++++++++++++++++++++++++++++++++
>  8 files changed, 1367 insertions(+), 18 deletions(-)
>  create mode 100644 libavcodec/vaapi_encode_av1.c
>
> diff --git a/Changelog b/Changelog
> index 3876082844..7ae9b85d52 100644
> --- a/Changelog
> +++ b/Changelog
> @@ -25,6 +25,7 @@ version <next>:
>  - Raw VVC bitstream parser, muxer and demuxer
>  - Bitstream filter for editing metadata in VVC streams
>  - Bitstream filter for converting VVC from MP4 to Annex B
> +- VAAPI AV1 encoder
>
>  version 6.0:
>  - Radiance HDR image support
> diff --git a/configure b/configure
> index b018abf139..fbc729b5c6 100755
> --- a/configure
> +++ b/configure
> @@ -3323,6 +3323,8 @@ av1_qsv_decoder_select="qsvdec"
>  av1_qsv_encoder_select="qsvenc"
>  av1_qsv_encoder_deps="libvpl"
>  av1_amf_encoder_deps="amf"
> +av1_vaapi_encoder_deps="VAEncPictureParameterBufferAV1"
> +av1_vaapi_encoder_select="cbs_av1 vaapi_encode"
>
>  # parsers
>  aac_parser_select="adts_header mpeg4audio"
> @@ -7104,6 +7106,7 @@ if enabled vaapi; then
>      check_type "va/va.h va/va_enc_jpeg.h" "VAEncPictureParameterBufferJPEG"
>      check_type "va/va.h va/va_enc_vp8.h"  "VAEncPictureParameterBufferVP8"
>      check_type "va/va.h va/va_enc_vp9.h"  "VAEncPictureParameterBufferVP9"
> +    check_type "va/va.h va/va_enc_av1.h"  "VAEncPictureParameterBufferAV1"
>  fi
>
>  if enabled_all opencl libdrm ; then
> diff --git a/doc/encoders.texi b/doc/encoders.texi
> index 25d6b7f09e..fb331ebd8e 100644
> --- a/doc/encoders.texi
> +++ b/doc/encoders.texi
> @@ -3991,6 +3991,19 @@ Average variable bitrate.
>  Each encoder also has its own specific options:
>  @table @option
>
> +@item av1_vaapi
> +@option{profile} sets the value of @emph{seq_profile}.
> +@option{tier} sets the value of @emph{seq_tier}.
> +@option{level} sets the value of @emph{seq_level_idx}.
> +
> +@table @option
> +@item tiles
> +Set the number of tiles to encode the input video with, as columns x rows.
> +(default is 1x1).
> +@item tile_groups
> +Set tile groups number (default is 1).
> +@end table
> +
>  @item h264_vaapi
>  @option{profile} sets the value of @emph{profile_idc} and the @emph{constraint_set*_flag}s.
>  @option{level} sets the value of @emph{level_idc}.
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index 3cd5997e64..fe1e6aa99d 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -259,6 +259,7 @@ OBJS-$(CONFIG_AV1_MEDIACODEC_DECODER)  += mediacodecdec.o
>  OBJS-$(CONFIG_AV1_MEDIACODEC_ENCODER)  += mediacodecenc.o
>  OBJS-$(CONFIG_AV1_NVENC_ENCODER)       += nvenc_av1.o nvenc.o
>  OBJS-$(CONFIG_AV1_QSV_ENCODER)         += qsvenc_av1.o
> +OBJS-$(CONFIG_AV1_VAAPI_ENCODER)       += vaapi_encode_av1.o av1_profile_level.o
>  OBJS-$(CONFIG_AVRN_DECODER)            += avrndec.o
>  OBJS-$(CONFIG_AVRP_DECODER)            += r210dec.o
>  OBJS-$(CONFIG_AVRP_ENCODER)            += r210enc.o
> diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
> index 8775d15a4f..c43c1d7b48 100644
> --- a/libavcodec/allcodecs.c
> +++ b/libavcodec/allcodecs.c
> @@ -844,6 +844,7 @@ extern const FFCodec ff_av1_nvenc_encoder;
>  extern const FFCodec ff_av1_qsv_decoder;
>  extern const FFCodec ff_av1_qsv_encoder;
>  extern const FFCodec ff_av1_amf_encoder;
> +extern const FFCodec ff_av1_vaapi_encoder;
>  extern const FFCodec ff_libopenh264_encoder;
>  extern const FFCodec ff_libopenh264_decoder;
>  extern const FFCodec ff_h264_amf_encoder;
> diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
> index 2604f12b9e..2907e159fb 100644
> --- a/libavcodec/vaapi_encode.c
> +++ b/libavcodec/vaapi_encode.c
> @@ -669,6 +669,15 @@ static int vaapi_encode_set_output_timestamp(AVCodecContext *avctx,
>  {
>      VAAPIEncodeContext *ctx = avctx->priv_data;
>
> +    // AV1 packs P frame and next B frame into one pkt, and uses the other
> +    // repeat frame header pkt at the display order position of the P frame
> +    // to indicate its frame index. Each frame has a corresponding pkt in its
> +    // display order position. So don't need to consider delay for AV1 timestamp.
> +    if (avctx->codec_id == AV_CODEC_ID_AV1) {
> +        pkt->dts = pkt->pts - ctx->dts_pts_diff;
> +        return 0;
> +    }
> +
>      if (ctx->output_delay == 0) {
>          pkt->dts = pkt->pts;
>      } else if (pic->encode_order < ctx->decode_delay) {
> @@ -689,9 +698,10 @@ static int vaapi_encode_output(AVCodecContext *avctx,
>  {
>      VAAPIEncodeContext *ctx = avctx->priv_data;
>      VACodedBufferSegment *buf_list, *buf;
> -    VAStatus vas;
> +    AVPacket *pkt_ptr = pkt;
>      int total_size = 0;
>      uint8_t *ptr;
> +    VAStatus vas;
>      int err;
>
>      err = vaapi_encode_wait(avctx, pic);
> @@ -711,11 +721,52 @@ static int vaapi_encode_output(AVCodecContext *avctx,
>      for (buf = buf_list; buf; buf = buf->next)
>          total_size += buf->size;
>
> -    err = ff_get_encode_buffer(avctx, pkt, total_size, 0);
> -    ptr = pkt->data;
> +    /** repack av1 coded frame for not display and repeat frames */
> +    if (avctx->codec_id == AV_CODEC_ID_AV1) {
> +        int display_frame = pic->display_order <= pic->encode_order;
>
> -    if (err < 0)
> -        goto fail_mapped;
> +        if (display_frame) {
> +            total_size += ctx->header_data_size;
> +            err = ff_get_encode_buffer(avctx, pkt, total_size, 0);
> +            if (err < 0)
> +                goto fail_mapped;
> +            ptr = pkt->data;
> +
> +            if (ctx->header_data_size) {
> +                memcpy(ptr, ctx->header_data, ctx->header_data_size);
> +                ptr += ctx->header_data_size;
> +                ctx->header_data_size = 0;
> +            }
> +        } else {
> +            ctx->header_data = av_realloc(ctx->header_data, total_size);
> +            if (!ctx->header_data) {
> +                err = AVERROR(ENOMEM);
> +                goto fail_mapped;
> +            }
> +            ptr = ctx->header_data;
> +            ctx->header_data_size = total_size;
> +
> +            if (pic->tail_size) {
> +                if (ctx->tail_pkt->size) {
> +                    err = AVERROR(AVERROR_BUG);
> +                    goto fail_mapped;
> +                }
> +
> +                err = ff_get_encode_buffer(avctx, ctx->tail_pkt, pic->tail_size, 0);
> +                if (err < 0)
> +                    goto fail_mapped;
> +
> +                memcpy(ctx->tail_pkt->data, pic->tail_data, pic->tail_size);
> +                pkt_ptr = ctx->tail_pkt;
> +            }
> +        }
> +    } else {
> +        err = ff_get_encode_buffer(avctx, pkt, total_size, 0);
> +        ptr = pkt->data;
> +
> +        if (err < 0)
> +            goto fail_mapped;
> +    }
>
>      for (buf = buf_list; buf; buf = buf->next) {
>          av_log(avctx, AV_LOG_DEBUG, "Output buffer: %u bytes "
> @@ -726,10 +777,10 @@ static int vaapi_encode_output(AVCodecContext *avctx,
>      }
>
>      if (pic->type == PICTURE_TYPE_IDR)
> -        pkt->flags |= AV_PKT_FLAG_KEY;
> +        pkt_ptr->flags |= AV_PKT_FLAG_KEY;
>
> -    pkt->pts = pic->pts;
> -    pkt->duration = pic->duration;
> +    pkt_ptr->pts = pic->pts;
> +    pkt_ptr->duration = pic->duration;
>
>      vas = vaUnmapBuffer(ctx->hwctx->display, pic->output_buffer);
>      if (vas != VA_STATUS_SUCCESS) {
> @@ -742,8 +793,8 @@ static int vaapi_encode_output(AVCodecContext *avctx,
>      // for no-delay encoders this is handled in generic codec
>      if (avctx->codec->capabilities & AV_CODEC_CAP_DELAY &&
>          avctx->flags & AV_CODEC_FLAG_COPY_OPAQUE) {
> -        pkt->opaque     = pic->opaque;
> -        pkt->opaque_ref = pic->opaque_ref;
> +        pkt_ptr->opaque     = pic->opaque;
> +        pkt_ptr->opaque_ref = pic->opaque_ref;
>          pic->opaque_ref = NULL;
>      }
>
> @@ -752,6 +803,9 @@ static int vaapi_encode_output(AVCodecContext *avctx,
>
>      av_log(avctx, AV_LOG_DEBUG, "Output read for pic %"PRId64"/%"PRId64".\n",
>             pic->display_order, pic->encode_order);
> +
> +    vaapi_encode_set_output_timestamp(avctx, pic, pkt_ptr);
> +
>      return 0;
>
>  fail_mapped:
> @@ -1128,9 +1182,19 @@ static int vaapi_encode_pick_next(AVCodecContext *avctx,
>
>      vaapi_encode_add_ref(avctx, pic, pic, 0, 1, 0);
>      if (pic->type != PICTURE_TYPE_IDR) {
> -        vaapi_encode_add_ref(avctx, pic, start,
> -                             pic->type == PICTURE_TYPE_P,
> -                             b_counter > 0, 0);
> +        // TODO: apply both previous and forward multi reference for all vaapi encoders.
> +        // And L0/L1 reference frame number can be set dynamically through query
> +        // VAConfigAttribEncMaxRefFrames attribute.
> +        if (avctx->codec_id == AV_CODEC_ID_AV1) {
> +            for (i = 0; i < ctx->nb_next_prev; i++)
> +                vaapi_encode_add_ref(avctx, pic, ctx->next_prev[i],
> +                                     pic->type == PICTURE_TYPE_P,
> +                                     b_counter > 0, 0);
> +        } else
> +            vaapi_encode_add_ref(avctx, pic, start,
> +                                 pic->type == PICTURE_TYPE_P,
> +                                 b_counter > 0, 0);
> +
>          vaapi_encode_add_ref(avctx, pic, ctx->next_prev[ctx->nb_next_prev - 1], 0, 0, 1);
>      }
>
> @@ -1292,6 +1356,19 @@ int ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
>      AVFrame *frame = ctx->frame;
>      int err;
>
> +start:
> +    /** if no B frame before repeat P frame, sent repeat P frame out. */
> +    if (avctx->codec_id == AV_CODEC_ID_AV1 && ctx->tail_pkt->size) {
> +        for (VAAPIEncodePicture *tmp = ctx->pic_start; tmp; tmp = tmp->next) {
> +            if (tmp->type == PICTURE_TYPE_B && tmp->pts < ctx->tail_pkt->pts)
> +                break;
> +            else if (!tmp->next) {
> +                av_packet_move_ref(pkt, ctx->tail_pkt);
> +                goto end;
> +            }
> +        }
> +    }
> +
>      err = ff_encode_get_frame(avctx, frame);
>      if (err < 0 && err != AVERROR_EOF)
>          return err;
> @@ -1356,17 +1433,21 @@ int ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
>          return err;
>      }
>
> -    vaapi_encode_set_output_timestamp(avctx, pic, pkt);
> -    av_log(avctx, AV_LOG_DEBUG, "Output packet: pts %"PRId64", dts %"PRId64", "
> -           "size %u bytes.\n", pkt->pts, pkt->dts, pkt->size);
> -
>      ctx->output_order = pic->encode_order;
>      vaapi_encode_clear_old(avctx);
>
> +    /** loop to get an available pkt in encoder flushing. */
> +    if (ctx->end_of_stream && !pkt->size)
> +        goto start;
> +
> +end:
> +    if (pkt->size)
> +        av_log(avctx, AV_LOG_DEBUG, "Output packet: pts %"PRId64", dts %"PRId64", "
> +               "size %u bytes.\n", pkt->pts, pkt->dts, pkt->size);
> +
>      return 0;
>  }
>
> -
>  static av_cold void vaapi_encode_add_global_param(AVCodecContext *avctx, int type,
>                                                    void *buffer, size_t size)
>  {
> @@ -2667,6 +2748,12 @@ av_cold int ff_vaapi_encode_init(AVCodecContext *avctx)
>      ctx->device = (AVHWDeviceContext*)ctx->device_ref->data;
>      ctx->hwctx = ctx->device->hwctx;
>
> +    ctx->tail_pkt = av_packet_alloc();
> +    if (!ctx->tail_pkt) {
> +        err = AVERROR(ENOMEM);
> +        goto fail;
> +    }
> +
>      err = vaapi_encode_profile_entrypoint(avctx);
>      if (err < 0)
>          goto fail;
> @@ -2859,9 +2946,11 @@ av_cold int ff_vaapi_encode_close(AVCodecContext *avctx)
>      }
>
>      av_frame_free(&ctx->frame);
> +    av_packet_free(&ctx->tail_pkt);
>
>      av_freep(&ctx->codec_sequence_params);
>      av_freep(&ctx->codec_picture_params);
> +    av_freep(&ctx->header_data);
>      av_fifo_freep2(&ctx->encode_fifo);
>
>      av_buffer_unref(&ctx->recon_frames_ref);
> diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
> index d5452a37b3..03df8d6d46 100644
> --- a/libavcodec/vaapi_encode.h
> +++ b/libavcodec/vaapi_encode.h
> @@ -133,6 +133,11 @@ typedef struct VAAPIEncodePicture {
>
>      int          nb_slices;
>      VAAPIEncodeSlice *slices;
> +
> +    /** Tail data of current pic, used only for repeat header of AV1. */
> +    char tail_data[MAX_PARAM_BUFFER_SIZE];
> +    /** Byte length of tail_data. */
> +    size_t tail_size;
>  } VAAPIEncodePicture;
>
>  typedef struct VAAPIEncodeProfile {
> @@ -367,6 +372,13 @@ typedef struct VAAPIEncodeContext {
>      AVFifo          *encode_fifo;
>      // Max number of frame buffered in encoder.
>      int             async_depth;
> +
> +    /** Head data for current output pkt, used only for AV1. */
> +    void  *header_data;
> +    size_t header_data_size;
> +
> +    /** Store av1 repeat frame header pkt. */
> +    AVPacket *tail_pkt;
>  } VAAPIEncodeContext;
>
>  enum {
> diff --git a/libavcodec/vaapi_encode_av1.c b/libavcodec/vaapi_encode_av1.c
> new file mode 100644
> index 0000000000..778c5b35c7
> --- /dev/null
> +++ b/libavcodec/vaapi_encode_av1.c
> @@ -0,0 +1,1229 @@
> +/*
> + * Copyright (c) 2023 Intel Corporation
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include <va/va.h>
> +#include <va/va_enc_av1.h>
> +
> +#include "libavutil/pixdesc.h"
> +#include "libavutil/opt.h"
> +
> +#include "cbs_av1.h"
> +#include "put_bits.h"
> +#include "codec_internal.h"
> +#include "av1_profile_level.h"
> +#include "vaapi_encode.h"
> +
> +#define AV1_MAX_QUANT 255
> +
> +typedef struct VAAPIEncodeAV1Picture {
> +    int64_t last_idr_frame;
> +    int slot;
> +} VAAPIEncodeAV1Picture;
> +
> +typedef struct VAAPIEncodeAV1Context {
> +    VAAPIEncodeContext common;
> +    AV1RawOBU sh; /**< sequence header.*/
> +    AV1RawOBU fh; /**< frame header.*/
> +    CodedBitstreamContext *cbc;
> +    CodedBitstreamFragment current_obu;
> +    VAConfigAttribValEncAV1 attr;
> +    VAConfigAttribValEncAV1Ext1 attr_ext1;
> +    VAConfigAttribValEncAV1Ext2 attr_ext2;
> +
> +    char sh_data[MAX_PARAM_BUFFER_SIZE]; /**< coded sequence header data. */
> +    size_t sh_data_len; /**< bit length of sh_data. */
> +    char fh_data[MAX_PARAM_BUFFER_SIZE]; /**< coded frame header data. */
> +    size_t fh_data_len; /**< bit length of fh_data. */
> +
> +    uint8_t uniform_tile;
> +    uint8_t use_128x128_superblock;
> +    int sb_cols;
> +    int sb_rows;
> +    int tile_cols_log2;
> +    int tile_rows_log2;
> +    int max_tile_width_sb;
> +    int max_tile_height_sb;
> +    uint8_t width_in_sbs_minus_1[AV1_MAX_TILE_COLS];
> +    uint8_t height_in_sbs_minus_1[AV1_MAX_TILE_ROWS];
> +
> +    int min_log2_tile_cols;
> +    int max_log2_tile_cols;
> +    int min_log2_tile_rows;
> +    int max_log2_tile_rows;
> +
> +    int q_idx_idr;
> +    int q_idx_p;
> +    int q_idx_b;
> +
> +    /** user options */
> +    int profile;
> +    int level;
> +    int tier;
> +    int tile_cols, tile_rows;
> +    int tile_groups;
> +} VAAPIEncodeAV1Context;
> +
> +static av_cold int vaapi_encode_av1_configure(AVCodecContext *avctx)
> +{
> +    VAAPIEncodeContext     *ctx = avctx->priv_data;
> +    VAAPIEncodeAV1Context *priv = avctx->priv_data;
> +    int ret;
> +
> +    ret = ff_cbs_init(&priv->cbc, AV_CODEC_ID_AV1, avctx);
> +    if (ret < 0)
> +        return ret;
> +
> +    if (ctx->rc_mode->quality) {
> +        priv->q_idx_p = av_clip(ctx->rc_quality, 0, AV1_MAX_QUANT);
> +        if (fabs(avctx->i_quant_factor) > 0.0)
> +            priv->q_idx_idr =
> +                av_clip((fabs(avctx->i_quant_factor) * priv->q_idx_p  +
> +                         avctx->i_quant_offset) + 0.5,
> +                        0, AV1_MAX_QUANT);
> +        else
> +            priv->q_idx_idr = priv->q_idx_p;
> +
> +        if (fabs(avctx->b_quant_factor) > 0.0)
> +            priv->q_idx_b =
> +                av_clip((fabs(avctx->b_quant_factor) * priv->q_idx_p  +
> +                         avctx->b_quant_offset) + 0.5,
> +                        0, AV1_MAX_QUANT);
> +        else
> +            priv->q_idx_b = priv->q_idx_p;
> +    } else {
> +        /** Arbitrary value */
> +        priv->q_idx_idr = priv->q_idx_p = priv->q_idx_b = 128;
> +    }
> +
> +    return 0;
> +}
> +
> +static int vaapi_encode_av1_add_obu(AVCodecContext *avctx,
> +                                    CodedBitstreamFragment *au,
> +                                    uint8_t type,
> +                                    void *obu_unit)
> +{
> +    int ret;
> +
> +    ret = ff_cbs_insert_unit_content(au, -1,
> +                                     type, obu_unit, NULL);
> +    if (ret < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to add OBU unit: "
> +               "type = %d.\n", type);
> +        return ret;
> +    }
> +
> +    return 0;
> +}
> +
> +static int vaapi_encode_av1_write_obu(AVCodecContext *avctx,
> +                                      char *data, size_t *data_len,
> +                                      CodedBitstreamFragment *bs)
> +{
> +    VAAPIEncodeAV1Context *priv = avctx->priv_data;
> +    int ret;
> +
> +    ret = ff_cbs_write_fragment_data(priv->cbc, bs);
> +    if (ret < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to write packed header.\n");
> +        return ret;
> +    }
> +
> +    if ((size_t)8 * MAX_PARAM_BUFFER_SIZE < 8 * bs->data_size - bs->data_bit_padding) {
> +        av_log(avctx, AV_LOG_ERROR, "Access unit too large: "
> +               "%zu < %zu.\n", (size_t)8 * MAX_PARAM_BUFFER_SIZE,
> +               8 * bs->data_size - bs->data_bit_padding);
> +        return AVERROR(ENOSPC);
> +    }
> +
> +    memcpy(data, bs->data, bs->data_size);
> +    *data_len = 8 * bs->data_size - bs->data_bit_padding;
> +
> +    return 0;
> +}
> +
> +static int get_relative_dist(const AV1RawSequenceHeader *seq,
> +                             unsigned int a, unsigned int b)
> +{
> +    unsigned int diff, m;
> +    if (!seq->enable_order_hint)
> +        return 0;
> +    diff = a - b;
> +    m = 1 << seq->order_hint_bits_minus_1;
> +    diff = (diff & (m - 1)) - (diff & m);
> +    return diff;
> +}
> +
> +static int write_ns(PutBitContext *pbc, uint32_t n, uint32_t value)
> +{
> +    uint32_t w, m, v, extra_bit;
> +
> +    w = av_log2(n) + 1;
> +    m = (1 << w) - n;
> +
> +    if (value < m) {
> +        v = value;
> +        put_bits(pbc, w - 1, v);
> +    } else {
> +        v = m + ((value - m) >> 1);
> +        extra_bit = (value - m) & 1;
> +        put_bits(pbc, w - 1, v);
> +        put_bits(pbc, 1, extra_bit);
> +    }
> +
> +    return 0;
> +}
> +
> +/**
> + * This API provide the minmum implemention according current enabled features
> + * in frame header. If more features will enable in furture, please make sure
> + * the relative flags of features should be packed correctly into frame header
> + * obu in this API.
> + */
> +static int vaapi_encode_av1_write_frame_header(AVCodecContext *avctx,
> +                                               VAAPIEncodePicture *pic,
> +                                               char *data, size_t *data_len)
> +{
> +    VAAPIEncodeContext              *ctx = avctx->priv_data;
> +    VAAPIEncodeAV1Context          *priv = avctx->priv_data;
> +    AV1RawOBU                    *fh_obu = &priv->fh;
> +    AV1RawOBU                    *sh_obu = &priv->sh;
> +    AV1RawFrameHeader                *fh = &fh_obu->obu.frame.header;
> +    AV1RawSequenceHeader             *sh = &sh_obu->obu.sequence_header;
> +    VAEncPictureParameterBufferAV1 *vpic = pic->codec_picture_params;
> +    PutBitContext pbc, pbc_tmp;
> +    uint8_t byte;
> +    int qindex, coded_lossless;
> +    int id_len, frame_is_intra, skip_mode_allowed;
> +    int start, payload_bits, obu_size, obu_size_len;
> +    int qindex_offset, loopfilter_offset;
> +    int cdef_start_offset, cdef_end_offset;
> +    int i;
> +
> +    init_put_bits(&pbc, data, MAX_PARAM_BUFFER_SIZE);
> +
> +    /** obu header */
> +    put_bits(&pbc, 1, fh_obu->header.obu_forbidden_bit);
> +    put_bits(&pbc, 4, fh_obu->header.obu_type);
> +    put_bits(&pbc, 1, fh_obu->header.obu_extension_flag);
> +    put_bits(&pbc, 1, fh_obu->header.obu_has_size_field);
> +    put_bits(&pbc, 1, fh_obu->header.obu_reserved_1bit);
> +
> +    /** record pbc status to re-write obu size later. */
> +    if (fh_obu->header.obu_has_size_field) {
> +        pbc_tmp = pbc;
> +        put_bits32(&pbc, 0);
> +        put_bits32(&pbc, 0);
> +    }
> +
> +    start = put_bits_count(&pbc);
> +
> +    /** uncompressed_header() */
> +    if (sh->frame_id_numbers_present_flag)
> +        id_len = sh->additional_frame_id_length_minus_1 +
> +                 sh->delta_frame_id_length_minus_2 + 3;
> +
> +    frame_is_intra = (fh->frame_type == AV1_FRAME_KEY ||
> +                      fh->frame_type == AV1_FRAME_INTRA_ONLY);
> +    if (!sh->reduced_still_picture_header) {
> +        put_bits(&pbc, 1, fh->show_existing_frame);
> +        if (fh->show_existing_frame) {
> +            put_bits(&pbc, 3, fh->frame_to_show_map_idx);
> +            goto trailing_bits;
> +        }
> +        if (sh->frame_id_numbers_present_flag)
> +            put_bits(&pbc, id_len, fh->display_frame_id);
> +
> +        put_bits(&pbc, 2, fh->frame_type);
> +        put_bits(&pbc, 1, fh->show_frame);
> +        if (!fh->show_frame)
> +            put_bits(&pbc, 1, fh->showable_frame);
> +
> +        if (!(fh->frame_type == AV1_FRAME_SWITCH ||
> +            (fh->frame_type == AV1_FRAME_KEY && fh->show_frame)))
> +            put_bits(&pbc, 1, fh->error_resilient_mode);
> +    }
> +
> +    put_bits(&pbc, 1, fh->disable_cdf_update);
> +    if (sh->seq_force_screen_content_tools == AV1_SELECT_SCREEN_CONTENT_TOOLS)
> +        put_bits(&pbc, 1, fh->allow_screen_content_tools);
> +
> +    if (fh->allow_screen_content_tools && sh->seq_force_integer_mv == AV1_SELECT_INTEGER_MV)
> +        put_bits(&pbc, 1, fh->force_integer_mv);
> +
> +    if (sh->frame_id_numbers_present_flag)
> +        put_bits(&pbc, id_len, fh->current_frame_id);
> +
> +    if (fh->frame_type != AV1_FRAME_SWITCH && !sh->reduced_still_picture_header)
> +        put_bits(&pbc, 1, fh->frame_size_override_flag);
> +
> +    if (sh->enable_order_hint)
> +        put_bits(&pbc, sh->order_hint_bits_minus_1 + 1, fh->order_hint);
> +
> +    if (!(frame_is_intra || fh->error_resilient_mode))
> +        put_bits(&pbc, 3, fh->primary_ref_frame);
> +
> +    if (!(fh->frame_type == AV1_FRAME_SWITCH ||
> +        fh->frame_type == AV1_FRAME_KEY && fh->show_frame))
> +        put_bits(&pbc, 8, fh->refresh_frame_flags);
> +
> +    if (frame_is_intra) {
> +        /** render_size() */
> +         put_bits(&pbc, 1, fh->render_and_frame_size_different);
> +    } else {
> +        if (!frame_is_intra && sh->enable_order_hint)
> +            put_bits(&pbc, 1, fh->frame_refs_short_signaling);
> +
> +        for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
> +            if (!fh->frame_refs_short_signaling)
> +                put_bits(&pbc, 3, fh->ref_frame_idx[i]);
> +        }
> +
> +        if (!(fh->frame_size_override_flag && !fh->error_resilient_mode))
> +            put_bits(&pbc, 1, fh->render_and_frame_size_different);
> +
> +        if (!fh->force_integer_mv)
> +            put_bits(&pbc, 1, fh->allow_high_precision_mv);
> +
> +        /** read_interpolation_filter() */
> +        put_bits(&pbc, 1, fh->is_filter_switchable);
> +        if (!fh->is_filter_switchable)
> +            put_bits(&pbc, 2, fh->interpolation_filter);
> +
> +        put_bits(&pbc, 1, fh->is_motion_mode_switchable);
> +    }
> +
> +    if (!(sh->reduced_still_picture_header || fh->disable_cdf_update))
> +        put_bits(&pbc, 1, fh->disable_frame_end_update_cdf);
> +
> +    /** tile_info() */
> +    put_bits(&pbc, 1, fh->uniform_tile_spacing_flag);
> +    if (fh->uniform_tile_spacing_flag) {
> +        for (i = 0; i < priv->tile_cols_log2 - priv->min_log2_tile_cols; i++) {
> +            put_bits(&pbc, 1, 1);
> +        }
> +        if (priv->tile_cols_log2 < priv->max_log2_tile_cols)
> +            put_bits(&pbc, 1, 0);
> +
> +        for (i = 0; i < priv->tile_rows_log2 - priv->min_log2_tile_rows; i++) {
> +            put_bits(&pbc, 1, 1);
> +        }
> +        if (priv->tile_rows_log2 < priv->max_log2_tile_rows)
> +            put_bits(&pbc, 1, 0);
> +    } else {
> +        int start_sb = 0;
> +        int max_width, max_height;
> +        for (i = 0; start_sb < priv->sb_cols; i++) {
> +            max_width = FFMIN(priv->sb_cols - start_sb, priv->max_tile_width_sb);
> +            write_ns(&pbc, max_width, fh->width_in_sbs_minus_1[i]);
> +            start_sb += fh->width_in_sbs_minus_1[i] + 1;
> +        }
> +
> +        start_sb = 0;
> +        for (i = 0; start_sb < priv->sb_rows; i++) {
> +            max_height = FFMIN(priv->sb_rows - start_sb, priv->max_tile_height_sb);
> +            write_ns(&pbc, max_height, fh->height_in_sbs_minus_1[i]);
> +            start_sb += fh->height_in_sbs_minus_1[i] + 1;
> +        }
> +    }
> +
> +    if (priv->tile_cols_log2 || priv->tile_rows_log2) {
> +        put_bits(&pbc, priv->tile_cols_log2 + priv->tile_rows_log2, fh->context_update_tile_id);
> +        put_bits(&pbc, 2, fh->tile_size_bytes_minus1);
> +    }
> +
> +    qindex_offset = put_bits_count(&pbc);
> +    /** quantization_params() */
> +    put_bits(&pbc, 8, fh->base_q_idx);
> +    put_bits(&pbc, 1, fh->delta_q_y_dc);
> +    put_bits(&pbc, 1, fh->delta_q_u_dc);
> +    put_bits(&pbc, 1, fh->delta_q_u_ac);
> +    put_bits(&pbc, 1, fh->using_qmatrix);
> +
> +    /** segmentation_params() */
> +    put_bits(&pbc, 1, fh->segmentation_enabled);
> +
> +    /** delta_q_params() */
> +    if (fh->base_q_idx)
> +        put_bits(&pbc, 1, fh->delta_q_present);
> +    if (fh->delta_q_present)
> +        put_bits(&pbc, 2, fh->delta_q_res);
> +
> +    /** delta_lf_params() */
> +    if (fh->delta_q_present) {
> +        if (!fh->allow_intrabc)
> +            put_bits(&pbc, 1, fh->delta_lf_present);
> +        if (fh->delta_lf_present) {
> +            put_bits(&pbc, 2, fh->delta_lf_res);
> +            put_bits(&pbc, 1, fh->delta_lf_multi);
> +        }
> +    }
> +
> +    /** codelossless */
> +    coded_lossless = 1;
> +    for (i = 0; i < AV1_MAX_SEGMENTS; i++) {
> +        if (fh->segmentation_enabled && fh->feature_enabled[i][AV1_SEG_LVL_ALT_Q])
> +            qindex = fh->base_q_idx + fh->feature_value[i][AV1_SEG_LVL_ALT_Q];
> +        else
> +            qindex = fh->base_q_idx;
> +        qindex = av_clip_uintp2(qindex, 8);
> +
> +        if (qindex || fh->delta_q_y_dc || fh->delta_q_u_ac || fh->delta_q_u_dc ||
> +            fh->delta_q_v_ac || fh->delta_q_v_dc)
> +            coded_lossless = 0;
> +    }
> +
> +    loopfilter_offset = put_bits_count(&pbc);
> +    /** loop_filter_params() */
> +    if (!(coded_lossless || fh->allow_intrabc)) {
> +        put_bits(&pbc, 6, fh->loop_filter_level[0]);
> +        put_bits(&pbc, 6, fh->loop_filter_level[1]);
> +        if (fh->loop_filter_level[0] || fh->loop_filter_level[1]) {
> +            put_bits(&pbc, 6, fh->loop_filter_level[2]);
> +            put_bits(&pbc, 6, fh->loop_filter_level[3]);
> +        }
> +        put_bits(&pbc, 3, fh->loop_filter_sharpness);
> +        put_bits(&pbc, 1, fh->loop_filter_delta_enabled);
> +    }
> +
> +    cdef_start_offset = put_bits_count(&pbc);
> +    /** cdef_params() */
> +    if (!(coded_lossless || fh->allow_intrabc || !sh->enable_cdef)) {
> +        put_bits(&pbc, 2, fh->cdef_damping_minus_3);
> +        put_bits(&pbc, 2, fh->cdef_bits);
> +        for (i = 0; i < (1 << fh->cdef_bits); i++) {
> +            put_bits(&pbc, 4, fh->cdef_y_pri_strength[i]);
> +            put_bits(&pbc, 2, fh->cdef_y_sec_strength[i]);
> +            put_bits(&pbc, 4, fh->cdef_uv_pri_strength[i]);
> +            put_bits(&pbc, 2, fh->cdef_uv_sec_strength[i]);
> +        }
> +    }
> +    cdef_end_offset = put_bits_count(&pbc);
> +
> +    /** read_tx_mode() */
> +    if (fh->tx_mode == AV1_TX_MODE_SELECT)
> +        put_bits(&pbc, 1, 1);
> +    else
> +        put_bits(&pbc, 1, 0);
> +
> +    /** frame_reference_mode() */
> +    if (!frame_is_intra)
> +        put_bits(&pbc, 1, fh->reference_select);
> +
> +    /** skip_mode_params() */
> +    if (frame_is_intra || !fh->reference_select || !sh->enable_order_hint)
> +        skip_mode_allowed = 0;
> +    else {
> +        int forward_idx, backward_idx;
> +        int ref_hint, forward_hint, backward_hint;
> +
> +        forward_idx = -1;
> +        backward_idx = -1;
> +
> +        for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
> +            ref_hint = fh->ref_order_hint[fh->ref_frame_idx[i]];
> +            if (get_relative_dist(sh, ref_hint, fh->order_hint) < 0) {
> +                if (forward_idx < 0 || get_relative_dist(sh, ref_hint, forward_idx) > 0) {
> +                    forward_idx = i;
> +                    forward_hint = ref_hint;
> +                }
> +            } else if (get_relative_dist(sh, ref_hint, fh->order_hint) > 0) {
> +                if (backward_idx < 0 || get_relative_dist(sh, ref_hint, backward_hint) < 0) {
> +                    backward_idx = i;
> +                    backward_hint = ref_hint;
> +                }
> +            }
> +        }
> +        if (forward_idx < 0)
> +            skip_mode_allowed = 0;
> +        else if (backward_idx >= 0)
> +            skip_mode_allowed = 1;
> +        else {
> +            int second_forward_idx, second_forward_hint;
> +            second_forward_idx = -1;
> +            for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
> +                ref_hint = fh->ref_order_hint[fh->ref_frame_idx[i]];
> +                if (get_relative_dist(sh, ref_hint, forward_hint) < 0) {
> +                    if (second_forward_idx < 0 || get_relative_dist(sh, ref_hint, second_forward_hint) > 0){
> +                        second_forward_idx = i;
> +                        second_forward_hint = ref_hint;
> +                    }
> +                }
> +            }
> +            if (second_forward_idx < 0)
> +                skip_mode_allowed = 0;
> +            else
> +                skip_mode_allowed = 1;
> +        }
> +    }
> +
> +    if (skip_mode_allowed)
> +        put_bits(&pbc, 1, fh->skip_mode_present);
> +
> +    put_bits(&pbc, 1, fh->reduced_tx_set);
> +
> +    /** global_motion_params() */
> +    if (!frame_is_intra) {
> +        for (i = AV1_REF_FRAME_LAST; i <= AV1_REF_FRAME_ALTREF; i++) {
> +            put_bits(&pbc, 1, fh->is_global[i]);
> +            if (fh->is_global[i]) {
> +                put_bits(&pbc, 1, fh->is_rot_zoom[i]);
> +                if (!fh->is_rot_zoom[i])
> +                    put_bits(&pbc, 1, fh->is_translation[i]);
> +            }
> +        }
> +    }
> +
> +trailing_bits:
> +    payload_bits = put_bits_count(&pbc) - start;
> +
> +    /** trailing_bits() */
> +    put_bits(&pbc, 1, 1);
> +    obu_size = (put_bits_count(&pbc) - start + 7) / 8;
> +    for (i = 0; i < obu_size * 8 - payload_bits - 1; i++)
> +        put_bits(&pbc, 1, 0);
> +
> +    flush_put_bits(&pbc);
> +    *data_len = put_bits_count(&pbc);
> +
> +    /** update obu size in bitstream */
> +    if (fh_obu->header.obu_has_size_field) {
> +        obu_size_len = priv->attr_ext2.bits.obu_size_bytes_minus1 + 1;
> +        for (i = 0; i < obu_size_len; i++) {
> +            byte = obu_size >> (7 * i) & 0x7f;
> +            if (i < obu_size_len - 1)
> +                byte |= 0x80;
> +            put_bits(&pbc_tmp, 8, byte);
> +        }
> +        flush_put_bits(&pbc_tmp);
> +        memmove(pbc_tmp.buf_ptr, pbc_tmp.buf_ptr + (8 - obu_size_len), obu_size);
> +        *data_len -= (8 - obu_size_len) * 8;
> +    }
> +
> +    if (fh->show_existing_frame)
> +        return 0;
> +
> +    if (!(ctx->va_rc_mode & VA_RC_CQP)) {
> +        vpic->min_base_qindex = av_clip(avctx->qmin, 1, AV1_MAX_QUANT);
> +        vpic->max_base_qindex = av_clip(avctx->qmax, 1, AV1_MAX_QUANT);
> +
> +        vpic->bit_offset_qindex            = qindex_offset - (8 - obu_size_len) * 8;
> +        vpic->bit_offset_loopfilter_params = loopfilter_offset - (8 - obu_size_len) * 8;
> +        vpic->bit_offset_cdef_params       = cdef_start_offset - (8 - obu_size_len) * 8;
> +        vpic->size_in_bits_cdef_params     = cdef_end_offset - cdef_start_offset;
> +        vpic->size_in_bits_frame_hdr_obu   = *data_len;
> +
> +        vpic->byte_offset_frame_hdr_obu_size = (((pic->type == PICTURE_TYPE_IDR) ?
> +                                               priv->sh_data_len / 8 : 0) +
> +                                               (fh_obu->header.obu_extension_flag ?
> +                                               2 : 1));
> +    }
> +
> +    return 0;
> +}
> +
> +static int tile_log2(int blkSize, int target) {
> +    int k;
> +    for (k = 0; (blkSize << k) < target; k++);
> +    return k;
> +}
> +
> +static int vaapi_encode_av1_set_tile(AVCodecContext *avctx)
> +{
> +    VAAPIEncodeAV1Context *priv = avctx->priv_data;
> +    int mi_cols, mi_rows, sb_shift, sb_size;
> +    int max_tile_area_sb, max_tile_area_sb_varied;
> +    int tile_width_sb, tile_height_sb, widest_tile_sb;
> +    int min_log2_tiles;
> +    int tile_rows_tmp, i;
> +
> +    if (priv->tile_cols > AV1_MAX_TILE_COLS ||
> +        priv->tile_rows > AV1_MAX_TILE_ROWS) {
> +        av_log(avctx, AV_LOG_ERROR, "Invalid tile number %dx%d, should less than %dx%d.\n",
> +               priv->tile_cols, priv->tile_rows, AV1_MAX_TILE_COLS, AV1_MAX_TILE_ROWS);
> +        return AVERROR(EINVAL);
> +    }
> +
> +    mi_cols = 2 * ((avctx->width + 7) >> 3);
> +    mi_rows = 2 * ((avctx->height + 7) >> 3);
> +    priv->sb_cols = priv->use_128x128_superblock ?
> +                    ((mi_cols + 31) >> 5) : ((mi_cols + 15) >> 4);
> +    priv->sb_rows = priv->use_128x128_superblock ?
> +                    ((mi_rows + 31) >> 5) : ((mi_rows + 15) >> 4);
> +    sb_shift = priv->use_128x128_superblock ? 5 : 4;
> +    sb_size  = sb_shift + 2;
> +    priv->max_tile_width_sb = AV1_MAX_TILE_WIDTH >> sb_size;
> +    max_tile_area_sb = AV1_MAX_TILE_AREA  >> (2 * sb_size);
> +
> +    priv->min_log2_tile_cols = tile_log2(priv->max_tile_width_sb, priv->sb_cols);
> +    priv->max_log2_tile_cols = tile_log2(1, FFMIN(priv->sb_cols, AV1_MAX_TILE_COLS));
> +    priv->max_log2_tile_rows = tile_log2(1, FFMIN(priv->sb_rows, AV1_MAX_TILE_ROWS));
> +    min_log2_tiles = FFMAX(priv->min_log2_tile_cols,
> +                           tile_log2(max_tile_area_sb, priv->sb_rows * priv->sb_cols));
> +
> +    if (priv->tile_cols != av_clip(priv->tile_cols, (priv->sb_cols + priv->max_tile_width_sb - 1) / priv->max_tile_width_sb, priv->sb_cols)) {
> +        priv->tile_cols = av_clip(priv->tile_cols, (priv->sb_cols + priv->max_tile_width_sb - 1) / priv->max_tile_width_sb, priv->sb_cols);
> +        av_log(avctx, AV_LOG_WARNING, "Invalid tile cols, correct to %d.\n", priv->tile_cols);
> +    }
> +
> +    priv->tile_cols_log2 = tile_log2(1, priv->tile_cols);
> +    tile_width_sb = (priv->sb_cols + (1 << priv->tile_cols_log2) - 1) >>
> +                    priv->tile_cols_log2;
> +
> +    if (priv->tile_rows > priv->sb_rows) {
> +        priv->tile_rows = priv->sb_rows;
> +        av_log(avctx, AV_LOG_WARNING, "Invalid tile rows, correct to %d.\n", priv->tile_rows);
> +    }
> +
> +    for (tile_rows_tmp = priv->tile_rows; tile_rows_tmp <= priv->sb_rows && tile_rows_tmp <= AV1_MAX_TILE_ROWS; tile_rows_tmp++) {
> +        /** try uniformed tile. */
> +        priv->tile_rows_log2 = tile_log2(1, tile_rows_tmp);
> +        if ((priv->sb_cols + tile_width_sb - 1) / tile_width_sb == priv->tile_cols) {
> +            for (i = 0; i < priv->tile_cols - 1; i++)
> +                priv->width_in_sbs_minus_1[i] = tile_width_sb - 1;
> +            priv->width_in_sbs_minus_1[i] = priv->sb_cols - (priv->tile_cols - 1) * tile_width_sb - 1;
> +
> +            tile_height_sb = (priv->sb_rows + (1 << priv->tile_rows_log2) - 1) >>
> +                             priv->tile_rows_log2;
> +
> +            if ((priv->sb_rows + tile_height_sb - 1) / tile_height_sb == tile_rows_tmp &&
> +                tile_height_sb <= max_tile_area_sb / tile_width_sb) {
> +                for (i = 0; i < tile_rows_tmp - 1; i++)
> +                    priv->height_in_sbs_minus_1[i] = tile_height_sb - 1;
> +                priv->height_in_sbs_minus_1[i] = priv->sb_rows - (tile_rows_tmp - 1) * tile_height_sb - 1;
> +
> +                priv->uniform_tile = 1;
> +                priv->min_log2_tile_rows = FFMAX(min_log2_tiles - priv->tile_cols_log2, 0);
> +
> +                break;
> +            }
> +        }
> +
> +        /** try non-uniformed tile. */
> +        widest_tile_sb = 0;
> +        for (i = 0; i < priv->tile_cols; i++) {
> +            priv->width_in_sbs_minus_1[i] = (i + 1) * priv->sb_cols / priv->tile_cols - i * priv->sb_cols / priv->tile_cols - 1;
> +            widest_tile_sb = FFMAX(widest_tile_sb, priv->width_in_sbs_minus_1[i] + 1);
> +        }
> +
> +        if (min_log2_tiles)
> +            max_tile_area_sb_varied = (priv->sb_rows * priv->sb_cols) >> (min_log2_tiles + 1);
> +        else
> +            max_tile_area_sb_varied = priv->sb_rows * priv->sb_cols;
> +        priv->max_tile_height_sb = FFMAX(1, max_tile_area_sb_varied / widest_tile_sb);
> +
> +        if (tile_rows_tmp == av_clip(tile_rows_tmp, (priv->sb_rows + priv->max_tile_height_sb - 1) / priv->max_tile_height_sb, priv->sb_rows)) {
> +            for (i = 0; i < tile_rows_tmp; i++)
> +                priv->height_in_sbs_minus_1[i] = (i + 1) * priv->sb_rows / tile_rows_tmp - i * priv->sb_rows / tile_rows_tmp - 1;
> +
> +            break;
> +        }
> +    }
> +
> +    if (priv->tile_rows != tile_rows_tmp) {
> +        priv->tile_rows = tile_rows_tmp;
> +        av_log(avctx, AV_LOG_WARNING, "Invalid tile rows, correct to %d.\n", priv->tile_rows);
> +    }
> +
> +    /** check if tile cols/rows is supported by driver. */
> +    if (priv->attr_ext2.bits.max_tile_num_minus1) {
> +        if ((priv->tile_cols * priv->tile_rows - 1) > priv->attr_ext2.bits.max_tile_num_minus1) {
> +            av_log(avctx, AV_LOG_ERROR, "Unsupported tile num %d * %d = %d by driver, "
> +                   "should be less than %d.\n", priv->tile_cols, priv->tile_rows,
> +                   priv->tile_cols * priv->tile_rows,
> +                   priv->attr_ext2.bits.max_tile_num_minus1 + 1);
> +            return AVERROR(EINVAL);
> +        }
> +    }
> +    av_log(avctx, AV_LOG_DEBUG, "Setting tile cols/rows to %d/%d.\n",
> +           priv->tile_cols, priv->tile_rows);
> +
> +    /** check if tile group numbers is valid. */
> +    if (priv->tile_groups > priv->tile_cols * priv->tile_rows) {
> +        av_log(avctx, AV_LOG_WARNING, "Invalid tile groups number %d, "
> +        "correct to %d.\n", priv->tile_groups, priv->tile_cols * priv->tile_rows);
> +        priv->tile_groups = priv->tile_cols * priv->tile_rows;
> +    }
> +
> +    return 0;
> +}
> +
> +static int vaapi_encode_av1_write_sequence_header(AVCodecContext *avctx,
> +                                                  char *data, size_t *data_len)
> +{
> +    VAAPIEncodeAV1Context *priv = avctx->priv_data;
> +
> +    memcpy(data, &priv->sh_data, MAX_PARAM_BUFFER_SIZE * sizeof(char));
> +    *data_len = priv->sh_data_len;
> +
> +    return 0;
> +}
> +
> +static int vaapi_encode_av1_init_sequence_params(AVCodecContext *avctx)
> +{
> +    VAAPIEncodeContext               *ctx = avctx->priv_data;
> +    VAAPIEncodeAV1Context           *priv = avctx->priv_data;
> +    AV1RawOBU                     *sh_obu = &priv->sh;
> +    AV1RawSequenceHeader              *sh = &sh_obu->obu.sequence_header;
> +    VAEncSequenceParameterBufferAV1 *vseq = ctx->codec_sequence_params;
> +    CodedBitstreamFragment           *obu = &priv->current_obu;
> +    const AVPixFmtDescriptor *desc;
> +    int ret;
> +
> +    memset(sh_obu, 0, sizeof(*sh_obu));
> +    sh_obu->header.obu_type = AV1_OBU_SEQUENCE_HEADER;
> +
> +    desc = av_pix_fmt_desc_get(priv->common.input_frames->sw_format);
> +    av_assert0(desc);
> +
> +    sh->seq_profile  = avctx->profile;
> +    if (!sh->seq_force_screen_content_tools)
> +        sh->seq_force_integer_mv = AV1_SELECT_INTEGER_MV;
> +    sh->frame_width_bits_minus_1  = av_log2(avctx->width);
> +    sh->frame_height_bits_minus_1 = av_log2(avctx->height);
> +    sh->max_frame_width_minus_1   = avctx->width - 1;
> +    sh->max_frame_height_minus_1  = avctx->height - 1;
> +    sh->seq_tier[0]               = priv->tier;
> +    /** enable order hint and reserve maximum 8 bits for it by default. */
> +    sh->enable_order_hint         = 1;
> +    sh->order_hint_bits_minus_1   = 7;
> +
> +    sh->color_config = (AV1RawColorConfig) {
> +        .high_bitdepth                  = desc->comp[0].depth == 8 ? 0 : 1,
> +        .color_primaries                = avctx->color_primaries,
> +        .transfer_characteristics       = avctx->color_trc,
> +        .matrix_coefficients            = avctx->colorspace,
> +        .color_description_present_flag = (avctx->color_primaries != AVCOL_PRI_UNSPECIFIED ||
> +                                           avctx->color_trc       != AVCOL_TRC_UNSPECIFIED ||
> +                                           avctx->colorspace      != AVCOL_SPC_UNSPECIFIED),
> +        .color_range                    = avctx->color_range == AVCOL_RANGE_JPEG,
> +        .subsampling_x                  = desc->log2_chroma_w,
> +        .subsampling_y                  = desc->log2_chroma_h,
> +    };
> +
> +    if (avctx->level != FF_LEVEL_UNKNOWN) {
> +        sh->seq_level_idx[0] = avctx->level;
> +    } else {
> +        const AV1LevelDescriptor *level;
> +        float framerate;
> +
> +        if (avctx->framerate.num > 0 && avctx->framerate.den > 0)
> +            framerate = avctx->framerate.num / avctx->framerate.den;
> +        else
> +            framerate = 0;
> +
> +        level = ff_av1_guess_level(avctx->bit_rate, priv->tier,
> +                                   ctx->surface_width, ctx->surface_height,
> +                                   priv->tile_rows * priv->tile_cols,
> +                                   priv->tile_cols, framerate);
> +        if (level) {
> +            av_log(avctx, AV_LOG_VERBOSE, "Using level %s.\n", level->name);
> +            sh->seq_level_idx[0] = level->level_idx;
> +        } else {
> +            av_log(avctx, AV_LOG_VERBOSE, "Stream will not conform to "
> +                   "any normal level, using level 6.3 by default.\n");
> +            sh->seq_level_idx[0] = 19;
> +            sh->seq_tier[0] = 1;
> +        }
> +    }
> +    vseq->seq_profile             = sh->seq_profile;
> +    vseq->seq_level_idx           = sh->seq_level_idx[0];
> +    vseq->seq_tier                = sh->seq_tier[0];
> +    vseq->order_hint_bits_minus_1 = sh->order_hint_bits_minus_1;
> +    vseq->intra_period            = ctx->gop_size;
> +    vseq->ip_period               = ctx->b_per_p + 1;
> +
> +    vseq->seq_fields.bits.enable_order_hint = sh->enable_order_hint;
> +
> +    if (!(ctx->va_rc_mode & VA_RC_CQP)) {
> +        vseq->bits_per_second = ctx->va_bit_rate;
> +        vseq->seq_fields.bits.enable_cdef = sh->enable_cdef = 1;
> +    }
> +
> +    ret = vaapi_encode_av1_add_obu(avctx, obu, AV1_OBU_SEQUENCE_HEADER, &priv->sh);
> +    if (ret < 0)
> +        goto end;
> +
> +    ret = vaapi_encode_av1_write_obu(avctx, priv->sh_data, &priv->sh_data_len, obu);
> +    if (ret < 0)
> +        goto end;
> +
> +end:
> +    ff_cbs_fragment_reset(obu);
> +    return ret;
> +}
> +
> +static int vaapi_encode_av1_init_picture_params(AVCodecContext *avctx,
> +                                                VAAPIEncodePicture *pic)
> +{
> +    VAAPIEncodeAV1Context          *priv = avctx->priv_data;
> +    VAAPIEncodeAV1Picture          *hpic = pic->priv_data;
> +    AV1RawOBU                    *fh_obu = &priv->fh;
> +    AV1RawFrameHeader                *fh = &fh_obu->obu.frame.header;
> +    VAEncPictureParameterBufferAV1 *vpic = pic->codec_picture_params;
> +    CodedBitstreamFragment          *obu = &priv->current_obu;
> +    VAAPIEncodePicture    *ref;
> +    VAAPIEncodeAV1Picture *href;
> +    int slot, i;
> +    int ret;
> +    static const int8_t default_loop_filter_ref_deltas[AV1_TOTAL_REFS_PER_FRAME] =
> +        { 1, 0, 0, 0, -1, 0, -1, -1 };
> +
> +    memset(fh_obu, 0, sizeof(*fh_obu));
> +    pic->nb_slices = priv->tile_groups;
> +    fh_obu->header.obu_type = AV1_OBU_FRAME_HEADER;
> +    fh_obu->header.obu_has_size_field = 1;
> +
> +    switch (pic->type) {
> +    case PICTURE_TYPE_IDR:
> +        av_assert0(pic->nb_refs[0] == 0 || pic->nb_refs[1]);
> +        fh->frame_type = AV1_FRAME_KEY;
> +        fh->refresh_frame_flags = 0xFF;
> +        fh->base_q_idx = priv->q_idx_idr;
> +        hpic->slot = 0;
> +        hpic->last_idr_frame = pic->display_order;
> +        break;
> +    case PICTURE_TYPE_P:
> +        av_assert0(pic->nb_refs[0]);
> +        fh->frame_type = AV1_FRAME_INTER;
> +        fh->base_q_idx = priv->q_idx_p;
> +        ref = pic->refs[0][pic->nb_refs[0] - 1];
> +        href = ref->priv_data;
> +        hpic->slot = !href->slot;
> +        hpic->last_idr_frame = href->last_idr_frame;
> +        fh->refresh_frame_flags = 1 << hpic->slot;
> +
> +        /** set the nearest frame in L0 as all reference frame. */
> +        for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
> +            fh->ref_frame_idx[i] = href->slot;
> +        }
> +        fh->primary_ref_frame = href->slot;
> +        fh->ref_order_hint[href->slot] = ref->display_order - href->last_idr_frame;
> +        vpic->ref_frame_ctrl_l0.fields.search_idx0 = AV1_REF_FRAME_LAST;
> +
> +        /** set the 2nd nearest frame in L0 as Golden frame. */
> +        if (pic->nb_refs[0] > 1) {
> +            ref = pic->refs[0][pic->nb_refs[0] - 2];
> +            href = ref->priv_data;
> +            fh->ref_frame_idx[3] = href->slot;
> +            fh->ref_order_hint[href->slot] = ref->display_order - href->last_idr_frame;
> +            vpic->ref_frame_ctrl_l0.fields.search_idx1 = AV1_REF_FRAME_GOLDEN;
> +        }
> +        break;
> +    case PICTURE_TYPE_B:
> +        av_assert0(pic->nb_refs[0] && pic->nb_refs[1]);
> +        fh->frame_type = AV1_FRAME_INTER;
> +        fh->base_q_idx = priv->q_idx_b;
> +        fh->refresh_frame_flags = 0x0;
> +        fh->reference_select = 1;
> +
> +        /** B frame will not be referenced, disable its recon frame. */
> +        vpic->picture_flags.bits.disable_frame_recon = 1;
> +
> +        /** Use LAST_FRAME and BWDREF_FRAME for reference. */
> +        vpic->ref_frame_ctrl_l0.fields.search_idx0 = AV1_REF_FRAME_LAST;
> +        vpic->ref_frame_ctrl_l1.fields.search_idx0 = AV1_REF_FRAME_BWDREF;
> +
> +        ref                            = pic->refs[0][pic->nb_refs[0] - 1];
> +        href                           = ref->priv_data;
> +        hpic->last_idr_frame           = href->last_idr_frame;
> +        fh->primary_ref_frame          = href->slot;
> +        fh->ref_order_hint[href->slot] = ref->display_order - href->last_idr_frame;
> +        for (i = 0; i < AV1_REF_FRAME_GOLDEN; i++) {
> +            fh->ref_frame_idx[i] = href->slot;
> +        }
> +
> +        ref                            = pic->refs[1][pic->nb_refs[1] - 1];
> +        href                           = ref->priv_data;
> +        fh->ref_order_hint[href->slot] = ref->display_order - href->last_idr_frame;
> +        for (i = AV1_REF_FRAME_GOLDEN; i < AV1_REFS_PER_FRAME; i++) {
> +            fh->ref_frame_idx[i] = href->slot;
> +        }
> +        break;
> +    default:
> +        av_assert0(0 && "invalid picture type");
> +    }
> +
> +    fh->show_frame                = pic->display_order <= pic->encode_order;
> +    fh->showable_frame            = fh->frame_type != AV1_FRAME_KEY;
> +    fh->frame_width_minus_1       = avctx->width - 1;
> +    fh->frame_height_minus_1      = avctx->height - 1;
> +    fh->render_width_minus_1      = fh->frame_width_minus_1;
> +    fh->render_height_minus_1     = fh->frame_height_minus_1;
> +    fh->order_hint                = pic->display_order - hpic->last_idr_frame;
> +    fh->tile_cols                 = priv->tile_cols;
> +    fh->tile_rows                 = priv->tile_rows;
> +    fh->tile_cols_log2            = priv->tile_cols_log2;
> +    fh->tile_rows_log2            = priv->tile_rows_log2;
> +    fh->uniform_tile_spacing_flag = priv->uniform_tile;
> +    fh->tile_size_bytes_minus1    = priv->attr_ext2.bits.tile_size_bytes_minus1;
> +    fh->reduced_tx_set            = 1;
> +
> +    /** ignore ONLY_4x4 mode for codedlossless is not fully implemented. */
> +    if (priv->attr_ext2.bits.tx_mode_support & 0x04)
> +        fh->tx_mode = AV1_TX_MODE_SELECT;
> +    else if (priv->attr_ext2.bits.tx_mode_support & 0x02)
> +        fh->tx_mode = AV1_TX_MODE_LARGEST;
> +    else
> +        return AVERROR(EINVAL);
> +
> +    for (i = 0; i < fh->tile_cols; i++)
> +        fh->width_in_sbs_minus_1[i] = vpic->width_in_sbs_minus_1[i] = priv->width_in_sbs_minus_1[i];
> +
> +    for (i = 0; i < fh->tile_rows; i++)
> +        fh->height_in_sbs_minus_1[i] = vpic->height_in_sbs_minus_1[i] = priv->height_in_sbs_minus_1[i];
> +
> +    memcpy(fh->loop_filter_ref_deltas, default_loop_filter_ref_deltas,
> +           AV1_TOTAL_REFS_PER_FRAME * sizeof(int8_t));
> +
> +    if (fh->frame_type == AV1_FRAME_KEY && fh->show_frame) {
> +        fh->error_resilient_mode = 1;
> +    }
> +
> +    if (fh->frame_type == AV1_FRAME_KEY || fh->error_resilient_mode)
> +        fh->primary_ref_frame = AV1_PRIMARY_REF_NONE;
> +
> +    vpic->base_qindex          = fh->base_q_idx;
> +    vpic->frame_width_minus_1  = fh->frame_width_minus_1;
> +    vpic->frame_height_minus_1 = fh->frame_height_minus_1;
> +    vpic->primary_ref_frame    = fh->primary_ref_frame;
> +    vpic->reconstructed_frame  = pic->recon_surface;
> +    vpic->coded_buf            = pic->output_buffer;
> +    vpic->tile_cols            = fh->tile_cols;
> +    vpic->tile_rows            = fh->tile_rows;
> +    vpic->order_hint           = fh->order_hint;
> +#if VA_CHECK_VERSION(1, 15, 0)
> +    vpic->refresh_frame_flags  = fh->refresh_frame_flags;
> +#endif
> +
> +    vpic->picture_flags.bits.enable_frame_obu     = 0;
> +    vpic->picture_flags.bits.frame_type           = fh->frame_type;
> +    vpic->picture_flags.bits.reduced_tx_set       = fh->reduced_tx_set;
> +    vpic->picture_flags.bits.error_resilient_mode = fh->error_resilient_mode;
> +
> +    /** let driver decide to use single or compound reference prediction mode. */
> +    vpic->mode_control_flags.bits.reference_mode = fh->reference_select ? 2 : 0;
> +    vpic->mode_control_flags.bits.tx_mode = fh->tx_mode;
> +
> +    vpic->tile_group_obu_hdr_info.bits.obu_has_size_field = 1;
> +
> +    /** set reference. */
> +    for (i = 0; i < AV1_REFS_PER_FRAME; i++)
> +        vpic->ref_frame_idx[i] = fh->ref_frame_idx[i];
> +
> +    for (i = 0; i < FF_ARRAY_ELEMS(vpic->reference_frames); i++)
> +        vpic->reference_frames[i] = VA_INVALID_SURFACE;
> +
> +    for (i = 0; i < MAX_REFERENCE_LIST_NUM; i++) {
> +        for (int j = 0; j < pic->nb_refs[i]; j++) {
> +            VAAPIEncodePicture *ref_pic = pic->refs[i][j];
> +
> +            slot = ((VAAPIEncodeAV1Picture*)ref_pic->priv_data)->slot;
> +            av_assert0(vpic->reference_frames[slot] == VA_INVALID_SURFACE);
> +
> +            vpic->reference_frames[slot] = ref_pic->recon_surface;
> +        }
> +    }
> +
> +    /** pack frame header, and set va params offset like bit_offset_qindex etc. */
> +    ret = vaapi_encode_av1_write_frame_header(avctx, pic, priv->fh_data, &priv->fh_data_len);
> +    if (ret < 0)
> +        goto end;
> +
> +end:
> +    ff_cbs_fragment_reset(obu);
> +    return ret;
> +}
> +
> +static int vaapi_encode_av1_init_slice_params(AVCodecContext *avctx,
> +                                              VAAPIEncodePicture *pic,
> +                                              VAAPIEncodeSlice *slice)
> +{
> +    VAAPIEncodeAV1Context      *priv = avctx->priv_data;
> +    VAEncTileGroupBufferAV1  *vslice = slice->codec_slice_params;
> +    CodedBitstreamAV1Context  *cbctx = priv->cbc->priv_data;
> +    int div;
> +
> +    /** Set tile group info. */
> +    div = priv->tile_cols * priv->tile_rows / priv->tile_groups;
> +    vslice->tg_start = slice->index * div;
> +    if (slice->index == (priv->tile_groups - 1)) {
> +        vslice->tg_end = priv->tile_cols * priv->tile_rows - 1;
> +        cbctx->seen_frame_header = 0;
> +    } else {
> +        vslice->tg_end = (slice->index + 1) * div - 1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int vaapi_encode_av1_write_picture_header(AVCodecContext *avctx,
> +                                                 VAAPIEncodePicture *pic,
> +                                                 char *data, size_t *data_len)
> +{
> +    VAAPIEncodeAV1Context     *priv = avctx->priv_data;
> +    CodedBitstreamFragment     *obu = &priv->current_obu;
> +    AV1RawOBU               *fh_obu = &priv->fh;
> +    AV1RawFrameHeader       *rep_fh = &fh_obu->obu.frame_header;
> +    VAAPIEncodeAV1Picture *href;
> +    int ret = 0;
> +
> +    pic->tail_size = 0;
> +    /** Pack repeat frame header. */
> +    if (pic->display_order > pic->encode_order) {
> +        memset(fh_obu, 0, sizeof(*fh_obu));
> +        href = pic->refs[0][pic->nb_refs[0] - 1]->priv_data;
> +        fh_obu->header.obu_type = AV1_OBU_FRAME_HEADER;
> +        fh_obu->header.obu_has_size_field = 1;
> +
> +        rep_fh->show_existing_frame   = 1;
> +        rep_fh->frame_to_show_map_idx = href->slot == 0;
> +        rep_fh->frame_type            = AV1_FRAME_INTER;
> +        rep_fh->frame_width_minus_1   = avctx->width - 1;
> +        rep_fh->frame_height_minus_1  = avctx->height - 1;
> +        rep_fh->render_width_minus_1  = rep_fh->frame_width_minus_1;
> +        rep_fh->render_height_minus_1 = rep_fh->frame_height_minus_1;
> +
> +        ret = vaapi_encode_av1_write_frame_header(avctx, pic, pic->tail_data, &pic->tail_size);
> +        if (ret < 0)
> +            goto end;
> +
> +        pic->tail_size /= 8;
> +    }
> +
> +    memcpy(data, &priv->fh_data, MAX_PARAM_BUFFER_SIZE * sizeof(char));
> +    *data_len = priv->fh_data_len;
> +
> +end:
> +    ff_cbs_fragment_reset(obu);
> +    return ret;
> +}
> +
> +static const VAAPIEncodeProfile vaapi_encode_av1_profiles[] = {
> +    { FF_PROFILE_AV1_MAIN,  8, 3, 1, 1, VAProfileAV1Profile0 },
> +    { FF_PROFILE_AV1_MAIN, 10, 3, 1, 1, VAProfileAV1Profile0 },
> +    { FF_PROFILE_UNKNOWN }
> +};
> +
> +static const VAAPIEncodeType vaapi_encode_type_av1 = {
> +    .profiles        = vaapi_encode_av1_profiles,
> +    .flags           = FLAG_B_PICTURES,
> +    .default_quality = 25,
> +    .configure       = &vaapi_encode_av1_configure,
> +
> +    .sequence_header_type  = VAEncPackedHeaderSequence,
> +    .sequence_params_size  = sizeof(VAEncSequenceParameterBufferAV1),
> +    .init_sequence_params  = &vaapi_encode_av1_init_sequence_params,
> +    .write_sequence_header = &vaapi_encode_av1_write_sequence_header,
> +
> +    .picture_priv_data_size = sizeof(VAAPIEncodeAV1Picture),
> +    .picture_header_type    = VAEncPackedHeaderPicture,
> +    .picture_params_size    = sizeof(VAEncPictureParameterBufferAV1),
> +    .init_picture_params    = &vaapi_encode_av1_init_picture_params,
> +    .write_picture_header   = &vaapi_encode_av1_write_picture_header,
> +
> +    .slice_params_size = sizeof(VAEncTileGroupBufferAV1),
> +    .init_slice_params = &vaapi_encode_av1_init_slice_params,
> +};
> +
> +static av_cold int vaapi_encode_av1_init(AVCodecContext *avctx)
> +{
> +    VAAPIEncodeContext      *ctx = avctx->priv_data;
> +    VAAPIEncodeAV1Context  *priv = avctx->priv_data;
> +    VAConfigAttrib attr;
> +    VAStatus vas;
> +    int ret;
> +
> +    ctx->codec = &vaapi_encode_type_av1;
> +
> +    ctx->desired_packed_headers =
> +        VA_ENC_PACKED_HEADER_SEQUENCE |
> +        VA_ENC_PACKED_HEADER_PICTURE;
> +
> +    if (avctx->profile == FF_PROFILE_UNKNOWN)
> +        avctx->profile = priv->profile;
> +    if (avctx->level == FF_LEVEL_UNKNOWN)
> +        avctx->level = priv->level;
> +
> +    if (avctx->level != FF_LEVEL_UNKNOWN && avctx->level & ~0x1f) {
> +        av_log(avctx, AV_LOG_ERROR, "Invalid level %d\n", avctx->level);
> +        return AVERROR(EINVAL);
> +    }
> +
> +    ret = ff_vaapi_encode_init(avctx);
> +    if (ret < 0)
> +        return ret;
> +
> +    attr.type = VAConfigAttribEncAV1;
> +    vas = vaGetConfigAttributes(ctx->hwctx->display,
> +                                ctx->va_profile,
> +                                ctx->va_entrypoint,
> +                                &attr, 1);
> +    if (vas != VA_STATUS_SUCCESS) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to query "
> +               "config attribute: %d (%s).\n", vas, vaErrorStr(vas));
> +        return AVERROR_EXTERNAL;
> +    } else if (attr.value == VA_ATTRIB_NOT_SUPPORTED) {
> +        priv->attr.value = 0;
> +        av_log(avctx, AV_LOG_WARNING, "Attribute type:%d is not "
> +               "supported.\n", attr.type);
> +    } else {
> +        priv->attr.value = attr.value;
> +    }
> +
> +    attr.type = VAConfigAttribEncAV1Ext1;
> +    vas = vaGetConfigAttributes(ctx->hwctx->display,
> +                                ctx->va_profile,
> +                                ctx->va_entrypoint,
> +                                &attr, 1);
> +    if (vas != VA_STATUS_SUCCESS) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to query "
> +               "config attribute: %d (%s).\n", vas, vaErrorStr(vas));
> +        return AVERROR_EXTERNAL;
> +    } else if (attr.value == VA_ATTRIB_NOT_SUPPORTED) {
> +        priv->attr_ext1.value = 0;
> +        av_log(avctx, AV_LOG_WARNING, "Attribute type:%d is not "
> +               "supported.\n", attr.type);
> +    } else {
> +        priv->attr_ext1.value = attr.value;
> +    }
> +
> +    /** This attr provides essential indicators, return error if not support. */
> +    attr.type = VAConfigAttribEncAV1Ext2;
> +    vas = vaGetConfigAttributes(ctx->hwctx->display,
> +                                ctx->va_profile,
> +                                ctx->va_entrypoint,
> +                                &attr, 1);
> +    if (vas != VA_STATUS_SUCCESS || attr.value == VA_ATTRIB_NOT_SUPPORTED) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to query "
> +               "config attribute: %d (%s).\n", vas, vaErrorStr(vas));
> +        return AVERROR_EXTERNAL;
> +    } else {
> +        priv->attr_ext2.value = attr.value;
> +    }
> +
> +    ret = vaapi_encode_av1_set_tile(avctx);
> +    if (ret < 0)
> +        return ret;
> +
> +    return 0;
> +}
> +
> +static av_cold int vaapi_encode_av1_close(AVCodecContext *avctx)
> +{
> +    VAAPIEncodeAV1Context *priv = avctx->priv_data;
> +
> +    ff_cbs_fragment_free(&priv->current_obu);
> +    ff_cbs_close(&priv->cbc);
> +
> +    return ff_vaapi_encode_close(avctx);
> +}
> +
> +#define OFFSET(x) offsetof(VAAPIEncodeAV1Context, x)
> +#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM)
> +
> +static const AVOption vaapi_encode_av1_options[] = {
> +    VAAPI_ENCODE_COMMON_OPTIONS,
> +    VAAPI_ENCODE_RC_OPTIONS,
> +    { "profile", "Set profile (seq_profile)",
> +      OFFSET(profile), AV_OPT_TYPE_INT,
> +      { .i64 = FF_PROFILE_UNKNOWN }, FF_PROFILE_UNKNOWN, 0xff, FLAGS, "profile" },
> +
> +#define PROFILE(name, value)  name, NULL, 0, AV_OPT_TYPE_CONST, \
> +    { .i64 = value }, 0, 0, FLAGS, "profile"
> +    { PROFILE("main",               FF_PROFILE_AV1_MAIN) },
> +    { PROFILE("high",               FF_PROFILE_AV1_HIGH) },
> +    { PROFILE("professional",       FF_PROFILE_AV1_PROFESSIONAL) },
> +#undef PROFILE
> +
> +    { "tier", "Set tier (seq_tier)",
> +      OFFSET(tier), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS, "tier" },
> +    { "main", NULL, 0, AV_OPT_TYPE_CONST,
> +      { .i64 = 0 }, 0, 0, FLAGS, "tier" },
> +    { "high", NULL, 0, AV_OPT_TYPE_CONST,
> +      { .i64 = 1 }, 0, 0, FLAGS, "tier" },
> +    { "level", "Set level (seq_level_idx)",
> +      OFFSET(level), AV_OPT_TYPE_INT,
> +      { .i64 = FF_LEVEL_UNKNOWN }, FF_LEVEL_UNKNOWN, 0x1f, FLAGS, "level" },
> +
> +#define LEVEL(name, value) name, NULL, 0, AV_OPT_TYPE_CONST, \
> +      { .i64 = value }, 0, 0, FLAGS, "level"
> +    { LEVEL("2.0",  0) },
> +    { LEVEL("2.1",  1) },
> +    { LEVEL("3.0",  4) },
> +    { LEVEL("3.1",  5) },
> +    { LEVEL("4.0",  8) },
> +    { LEVEL("4.1",  9) },
> +    { LEVEL("5.0", 12) },
> +    { LEVEL("5.1", 13) },
> +    { LEVEL("5.2", 14) },
> +    { LEVEL("5.3", 15) },
> +    { LEVEL("6.0", 16) },
> +    { LEVEL("6.1", 17) },
> +    { LEVEL("6.2", 18) },
> +    { LEVEL("6.3", 19) },
> +#undef LEVEL
> +
> +    { "tiles", "Tile columns x rows",
> +      OFFSET(tile_cols), AV_OPT_TYPE_IMAGE_SIZE, { .str = "1x1" }, 1, AV1_MAX_TILE_COLS, FLAGS },
> +    { "tile_groups", "Number of tile groups for encoding",
> +      OFFSET(tile_groups), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, AV1_MAX_TILE_ROWS * AV1_MAX_TILE_COLS, FLAGS },
> +
> +    { NULL },
> +};
> +
> +static const FFCodecDefault vaapi_encode_av1_defaults[] = {
> +    { "b",              "0"   },
> +    { "bf",             "7"   },
> +    { "g",              "120" },
> +    { "qmin",           "1"   },
> +    { "qmax",           "255" },
> +    { NULL },
> +};
> +
> +static const AVClass vaapi_encode_av1_class = {
> +    .class_name = "av1_vaapi",
> +    .item_name  = av_default_item_name,
> +    .option     = vaapi_encode_av1_options,
> +    .version    = LIBAVUTIL_VERSION_INT,
> +};
> +
> +const FFCodec ff_av1_vaapi_encoder = {
> +    .p.name         = "av1_vaapi",
> +    CODEC_LONG_NAME("AV1 (VAAPI)"),
> +    .p.type         = AVMEDIA_TYPE_VIDEO,
> +    .p.id           = AV_CODEC_ID_AV1,
> +    .priv_data_size = sizeof(VAAPIEncodeAV1Context),
> +    .init           = &vaapi_encode_av1_init,
> +    FF_CODEC_RECEIVE_PACKET_CB(&ff_vaapi_encode_receive_packet),
> +    .close          = &vaapi_encode_av1_close,
> +    .p.priv_class   = &vaapi_encode_av1_class,
> +    .p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE |
> +                      AV_CODEC_CAP_DR1 | AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE,
> +    .caps_internal  = FF_CODEC_CAP_NOT_INIT_THREADSAFE |
> +                      FF_CODEC_CAP_INIT_CLEANUP,
> +    .defaults       = vaapi_encode_av1_defaults,
> +    .p.pix_fmts = (const enum AVPixelFormat[]) {
> +        AV_PIX_FMT_VAAPI,
> +        AV_PIX_FMT_NONE,
> +    },
> +    .hw_configs     = ff_vaapi_encode_hw_configs,
> +    .p.wrapper_name = "vaapi",
> +};
> --
> 2.25.1
>

Seems sensible, but your sender address got rewritten. You might want
to add an additional

From: Fei Wang <fei.w.wang@intel.com>

at the top of your commit message body if you can't get the "From"
field in the emails to be set properly when you send to the list.

Otherwise the patch series LGTM. I don't have any hardware to exercise
this code with though, so that makes reviewing its behavior a little
tough.

Regardless, I can at least ack it.

Acked-by: Neal Gompa <ngompa13@gmail.com>




--
真実はいつも一つ!/ Always, there's only one truth!
Neal Gompa July 31, 2023, 9:51 p.m. UTC | #2
On Mon, Jul 31, 2023 at 9:04 PM Neal Gompa <ngompa13@gmail.com> wrote:
>
> On Tue, Jul 18, 2023 at 4:13 AM Fei Wang
> <fei.w.wang-at-intel.com@ffmpeg.org> wrote:
> >
> > Signed-off-by: Fei Wang <fei.w.wang@intel.com>
> > ---
> > update
> > 1. set color_range in sequence header.
> >
> >  Changelog                     |    1 +
> >  configure                     |    3 +
> >  doc/encoders.texi             |   13 +
> >  libavcodec/Makefile           |    1 +
> >  libavcodec/allcodecs.c        |    1 +
> >  libavcodec/vaapi_encode.c     |  125 +++-
> >  libavcodec/vaapi_encode.h     |   12 +
> >  libavcodec/vaapi_encode_av1.c | 1229 +++++++++++++++++++++++++++++++++
> >  8 files changed, 1367 insertions(+), 18 deletions(-)
> >  create mode 100644 libavcodec/vaapi_encode_av1.c
> >
> > diff --git a/Changelog b/Changelog
> > index 3876082844..7ae9b85d52 100644
> > --- a/Changelog
> > +++ b/Changelog
> > @@ -25,6 +25,7 @@ version <next>:
> >  - Raw VVC bitstream parser, muxer and demuxer
> >  - Bitstream filter for editing metadata in VVC streams
> >  - Bitstream filter for converting VVC from MP4 to Annex B
> > +- VAAPI AV1 encoder
> >
> >  version 6.0:
> >  - Radiance HDR image support
> > diff --git a/configure b/configure
> > index b018abf139..fbc729b5c6 100755
> > --- a/configure
> > +++ b/configure
> > @@ -3323,6 +3323,8 @@ av1_qsv_decoder_select="qsvdec"
> >  av1_qsv_encoder_select="qsvenc"
> >  av1_qsv_encoder_deps="libvpl"
> >  av1_amf_encoder_deps="amf"
> > +av1_vaapi_encoder_deps="VAEncPictureParameterBufferAV1"
> > +av1_vaapi_encoder_select="cbs_av1 vaapi_encode"
> >
> >  # parsers
> >  aac_parser_select="adts_header mpeg4audio"
> > @@ -7104,6 +7106,7 @@ if enabled vaapi; then
> >      check_type "va/va.h va/va_enc_jpeg.h" "VAEncPictureParameterBufferJPEG"
> >      check_type "va/va.h va/va_enc_vp8.h"  "VAEncPictureParameterBufferVP8"
> >      check_type "va/va.h va/va_enc_vp9.h"  "VAEncPictureParameterBufferVP9"
> > +    check_type "va/va.h va/va_enc_av1.h"  "VAEncPictureParameterBufferAV1"
> >  fi
> >
> >  if enabled_all opencl libdrm ; then
> > diff --git a/doc/encoders.texi b/doc/encoders.texi
> > index 25d6b7f09e..fb331ebd8e 100644
> > --- a/doc/encoders.texi
> > +++ b/doc/encoders.texi
> > @@ -3991,6 +3991,19 @@ Average variable bitrate.
> >  Each encoder also has its own specific options:
> >  @table @option
> >
> > +@item av1_vaapi
> > +@option{profile} sets the value of @emph{seq_profile}.
> > +@option{tier} sets the value of @emph{seq_tier}.
> > +@option{level} sets the value of @emph{seq_level_idx}.
> > +
> > +@table @option
> > +@item tiles
> > +Set the number of tiles to encode the input video with, as columns x rows.
> > +(default is 1x1).
> > +@item tile_groups
> > +Set tile groups number (default is 1).
> > +@end table
> > +
> >  @item h264_vaapi
> >  @option{profile} sets the value of @emph{profile_idc} and the @emph{constraint_set*_flag}s.
> >  @option{level} sets the value of @emph{level_idc}.
> > diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> > index 3cd5997e64..fe1e6aa99d 100644
> > --- a/libavcodec/Makefile
> > +++ b/libavcodec/Makefile
> > @@ -259,6 +259,7 @@ OBJS-$(CONFIG_AV1_MEDIACODEC_DECODER)  += mediacodecdec.o
> >  OBJS-$(CONFIG_AV1_MEDIACODEC_ENCODER)  += mediacodecenc.o
> >  OBJS-$(CONFIG_AV1_NVENC_ENCODER)       += nvenc_av1.o nvenc.o
> >  OBJS-$(CONFIG_AV1_QSV_ENCODER)         += qsvenc_av1.o
> > +OBJS-$(CONFIG_AV1_VAAPI_ENCODER)       += vaapi_encode_av1.o av1_profile_level.o
> >  OBJS-$(CONFIG_AVRN_DECODER)            += avrndec.o
> >  OBJS-$(CONFIG_AVRP_DECODER)            += r210dec.o
> >  OBJS-$(CONFIG_AVRP_ENCODER)            += r210enc.o
> > diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
> > index 8775d15a4f..c43c1d7b48 100644
> > --- a/libavcodec/allcodecs.c
> > +++ b/libavcodec/allcodecs.c
> > @@ -844,6 +844,7 @@ extern const FFCodec ff_av1_nvenc_encoder;
> >  extern const FFCodec ff_av1_qsv_decoder;
> >  extern const FFCodec ff_av1_qsv_encoder;
> >  extern const FFCodec ff_av1_amf_encoder;
> > +extern const FFCodec ff_av1_vaapi_encoder;
> >  extern const FFCodec ff_libopenh264_encoder;
> >  extern const FFCodec ff_libopenh264_decoder;
> >  extern const FFCodec ff_h264_amf_encoder;
> > diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
> > index 2604f12b9e..2907e159fb 100644
> > --- a/libavcodec/vaapi_encode.c
> > +++ b/libavcodec/vaapi_encode.c
> > @@ -669,6 +669,15 @@ static int vaapi_encode_set_output_timestamp(AVCodecContext *avctx,
> >  {
> >      VAAPIEncodeContext *ctx = avctx->priv_data;
> >
> > +    // AV1 packs P frame and next B frame into one pkt, and uses the other
> > +    // repeat frame header pkt at the display order position of the P frame
> > +    // to indicate its frame index. Each frame has a corresponding pkt in its
> > +    // display order position. So don't need to consider delay for AV1 timestamp.
> > +    if (avctx->codec_id == AV_CODEC_ID_AV1) {
> > +        pkt->dts = pkt->pts - ctx->dts_pts_diff;
> > +        return 0;
> > +    }
> > +
> >      if (ctx->output_delay == 0) {
> >          pkt->dts = pkt->pts;
> >      } else if (pic->encode_order < ctx->decode_delay) {
> > @@ -689,9 +698,10 @@ static int vaapi_encode_output(AVCodecContext *avctx,
> >  {
> >      VAAPIEncodeContext *ctx = avctx->priv_data;
> >      VACodedBufferSegment *buf_list, *buf;
> > -    VAStatus vas;
> > +    AVPacket *pkt_ptr = pkt;
> >      int total_size = 0;
> >      uint8_t *ptr;
> > +    VAStatus vas;
> >      int err;
> >
> >      err = vaapi_encode_wait(avctx, pic);
> > @@ -711,11 +721,52 @@ static int vaapi_encode_output(AVCodecContext *avctx,
> >      for (buf = buf_list; buf; buf = buf->next)
> >          total_size += buf->size;
> >
> > -    err = ff_get_encode_buffer(avctx, pkt, total_size, 0);
> > -    ptr = pkt->data;
> > +    /** repack av1 coded frame for not display and repeat frames */
> > +    if (avctx->codec_id == AV_CODEC_ID_AV1) {
> > +        int display_frame = pic->display_order <= pic->encode_order;
> >
> > -    if (err < 0)
> > -        goto fail_mapped;
> > +        if (display_frame) {
> > +            total_size += ctx->header_data_size;
> > +            err = ff_get_encode_buffer(avctx, pkt, total_size, 0);
> > +            if (err < 0)
> > +                goto fail_mapped;
> > +            ptr = pkt->data;
> > +
> > +            if (ctx->header_data_size) {
> > +                memcpy(ptr, ctx->header_data, ctx->header_data_size);
> > +                ptr += ctx->header_data_size;
> > +                ctx->header_data_size = 0;
> > +            }
> > +        } else {
> > +            ctx->header_data = av_realloc(ctx->header_data, total_size);
> > +            if (!ctx->header_data) {
> > +                err = AVERROR(ENOMEM);
> > +                goto fail_mapped;
> > +            }
> > +            ptr = ctx->header_data;
> > +            ctx->header_data_size = total_size;
> > +
> > +            if (pic->tail_size) {
> > +                if (ctx->tail_pkt->size) {
> > +                    err = AVERROR(AVERROR_BUG);
> > +                    goto fail_mapped;
> > +                }
> > +
> > +                err = ff_get_encode_buffer(avctx, ctx->tail_pkt, pic->tail_size, 0);
> > +                if (err < 0)
> > +                    goto fail_mapped;
> > +
> > +                memcpy(ctx->tail_pkt->data, pic->tail_data, pic->tail_size);
> > +                pkt_ptr = ctx->tail_pkt;
> > +            }
> > +        }
> > +    } else {
> > +        err = ff_get_encode_buffer(avctx, pkt, total_size, 0);
> > +        ptr = pkt->data;
> > +
> > +        if (err < 0)
> > +            goto fail_mapped;
> > +    }
> >
> >      for (buf = buf_list; buf; buf = buf->next) {
> >          av_log(avctx, AV_LOG_DEBUG, "Output buffer: %u bytes "
> > @@ -726,10 +777,10 @@ static int vaapi_encode_output(AVCodecContext *avctx,
> >      }
> >
> >      if (pic->type == PICTURE_TYPE_IDR)
> > -        pkt->flags |= AV_PKT_FLAG_KEY;
> > +        pkt_ptr->flags |= AV_PKT_FLAG_KEY;
> >
> > -    pkt->pts = pic->pts;
> > -    pkt->duration = pic->duration;
> > +    pkt_ptr->pts = pic->pts;
> > +    pkt_ptr->duration = pic->duration;
> >
> >      vas = vaUnmapBuffer(ctx->hwctx->display, pic->output_buffer);
> >      if (vas != VA_STATUS_SUCCESS) {
> > @@ -742,8 +793,8 @@ static int vaapi_encode_output(AVCodecContext *avctx,
> >      // for no-delay encoders this is handled in generic codec
> >      if (avctx->codec->capabilities & AV_CODEC_CAP_DELAY &&
> >          avctx->flags & AV_CODEC_FLAG_COPY_OPAQUE) {
> > -        pkt->opaque     = pic->opaque;
> > -        pkt->opaque_ref = pic->opaque_ref;
> > +        pkt_ptr->opaque     = pic->opaque;
> > +        pkt_ptr->opaque_ref = pic->opaque_ref;
> >          pic->opaque_ref = NULL;
> >      }
> >
> > @@ -752,6 +803,9 @@ static int vaapi_encode_output(AVCodecContext *avctx,
> >
> >      av_log(avctx, AV_LOG_DEBUG, "Output read for pic %"PRId64"/%"PRId64".\n",
> >             pic->display_order, pic->encode_order);
> > +
> > +    vaapi_encode_set_output_timestamp(avctx, pic, pkt_ptr);
> > +
> >      return 0;
> >
> >  fail_mapped:
> > @@ -1128,9 +1182,19 @@ static int vaapi_encode_pick_next(AVCodecContext *avctx,
> >
> >      vaapi_encode_add_ref(avctx, pic, pic, 0, 1, 0);
> >      if (pic->type != PICTURE_TYPE_IDR) {
> > -        vaapi_encode_add_ref(avctx, pic, start,
> > -                             pic->type == PICTURE_TYPE_P,
> > -                             b_counter > 0, 0);
> > +        // TODO: apply both previous and forward multi reference for all vaapi encoders.
> > +        // And L0/L1 reference frame number can be set dynamically through query
> > +        // VAConfigAttribEncMaxRefFrames attribute.
> > +        if (avctx->codec_id == AV_CODEC_ID_AV1) {
> > +            for (i = 0; i < ctx->nb_next_prev; i++)
> > +                vaapi_encode_add_ref(avctx, pic, ctx->next_prev[i],
> > +                                     pic->type == PICTURE_TYPE_P,
> > +                                     b_counter > 0, 0);
> > +        } else
> > +            vaapi_encode_add_ref(avctx, pic, start,
> > +                                 pic->type == PICTURE_TYPE_P,
> > +                                 b_counter > 0, 0);
> > +
> >          vaapi_encode_add_ref(avctx, pic, ctx->next_prev[ctx->nb_next_prev - 1], 0, 0, 1);
> >      }
> >
> > @@ -1292,6 +1356,19 @@ int ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
> >      AVFrame *frame = ctx->frame;
> >      int err;
> >
> > +start:
> > +    /** if no B frame before repeat P frame, sent repeat P frame out. */
> > +    if (avctx->codec_id == AV_CODEC_ID_AV1 && ctx->tail_pkt->size) {
> > +        for (VAAPIEncodePicture *tmp = ctx->pic_start; tmp; tmp = tmp->next) {
> > +            if (tmp->type == PICTURE_TYPE_B && tmp->pts < ctx->tail_pkt->pts)
> > +                break;
> > +            else if (!tmp->next) {
> > +                av_packet_move_ref(pkt, ctx->tail_pkt);
> > +                goto end;
> > +            }
> > +        }
> > +    }
> > +
> >      err = ff_encode_get_frame(avctx, frame);
> >      if (err < 0 && err != AVERROR_EOF)
> >          return err;
> > @@ -1356,17 +1433,21 @@ int ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
> >          return err;
> >      }
> >
> > -    vaapi_encode_set_output_timestamp(avctx, pic, pkt);
> > -    av_log(avctx, AV_LOG_DEBUG, "Output packet: pts %"PRId64", dts %"PRId64", "
> > -           "size %u bytes.\n", pkt->pts, pkt->dts, pkt->size);
> > -
> >      ctx->output_order = pic->encode_order;
> >      vaapi_encode_clear_old(avctx);
> >
> > +    /** loop to get an available pkt in encoder flushing. */
> > +    if (ctx->end_of_stream && !pkt->size)
> > +        goto start;
> > +
> > +end:
> > +    if (pkt->size)
> > +        av_log(avctx, AV_LOG_DEBUG, "Output packet: pts %"PRId64", dts %"PRId64", "
> > +               "size %u bytes.\n", pkt->pts, pkt->dts, pkt->size);
> > +
> >      return 0;
> >  }
> >
> > -
> >  static av_cold void vaapi_encode_add_global_param(AVCodecContext *avctx, int type,
> >                                                    void *buffer, size_t size)
> >  {
> > @@ -2667,6 +2748,12 @@ av_cold int ff_vaapi_encode_init(AVCodecContext *avctx)
> >      ctx->device = (AVHWDeviceContext*)ctx->device_ref->data;
> >      ctx->hwctx = ctx->device->hwctx;
> >
> > +    ctx->tail_pkt = av_packet_alloc();
> > +    if (!ctx->tail_pkt) {
> > +        err = AVERROR(ENOMEM);
> > +        goto fail;
> > +    }
> > +
> >      err = vaapi_encode_profile_entrypoint(avctx);
> >      if (err < 0)
> >          goto fail;
> > @@ -2859,9 +2946,11 @@ av_cold int ff_vaapi_encode_close(AVCodecContext *avctx)
> >      }
> >
> >      av_frame_free(&ctx->frame);
> > +    av_packet_free(&ctx->tail_pkt);
> >
> >      av_freep(&ctx->codec_sequence_params);
> >      av_freep(&ctx->codec_picture_params);
> > +    av_freep(&ctx->header_data);
> >      av_fifo_freep2(&ctx->encode_fifo);
> >
> >      av_buffer_unref(&ctx->recon_frames_ref);
> > diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
> > index d5452a37b3..03df8d6d46 100644
> > --- a/libavcodec/vaapi_encode.h
> > +++ b/libavcodec/vaapi_encode.h
> > @@ -133,6 +133,11 @@ typedef struct VAAPIEncodePicture {
> >
> >      int          nb_slices;
> >      VAAPIEncodeSlice *slices;
> > +
> > +    /** Tail data of current pic, used only for repeat header of AV1. */
> > +    char tail_data[MAX_PARAM_BUFFER_SIZE];
> > +    /** Byte length of tail_data. */
> > +    size_t tail_size;
> >  } VAAPIEncodePicture;
> >
> >  typedef struct VAAPIEncodeProfile {
> > @@ -367,6 +372,13 @@ typedef struct VAAPIEncodeContext {
> >      AVFifo          *encode_fifo;
> >      // Max number of frame buffered in encoder.
> >      int             async_depth;
> > +
> > +    /** Head data for current output pkt, used only for AV1. */
> > +    void  *header_data;
> > +    size_t header_data_size;
> > +
> > +    /** Store av1 repeat frame header pkt. */
> > +    AVPacket *tail_pkt;
> >  } VAAPIEncodeContext;
> >
> >  enum {
> > diff --git a/libavcodec/vaapi_encode_av1.c b/libavcodec/vaapi_encode_av1.c
> > new file mode 100644
> > index 0000000000..778c5b35c7
> > --- /dev/null
> > +++ b/libavcodec/vaapi_encode_av1.c
> > @@ -0,0 +1,1229 @@
> > +/*
> > + * Copyright (c) 2023 Intel Corporation
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> > + */
> > +
> > +#include <va/va.h>
> > +#include <va/va_enc_av1.h>
> > +
> > +#include "libavutil/pixdesc.h"
> > +#include "libavutil/opt.h"
> > +
> > +#include "cbs_av1.h"
> > +#include "put_bits.h"
> > +#include "codec_internal.h"
> > +#include "av1_profile_level.h"
> > +#include "vaapi_encode.h"
> > +
> > +#define AV1_MAX_QUANT 255
> > +
> > +typedef struct VAAPIEncodeAV1Picture {
> > +    int64_t last_idr_frame;
> > +    int slot;
> > +} VAAPIEncodeAV1Picture;
> > +
> > +typedef struct VAAPIEncodeAV1Context {
> > +    VAAPIEncodeContext common;
> > +    AV1RawOBU sh; /**< sequence header.*/
> > +    AV1RawOBU fh; /**< frame header.*/
> > +    CodedBitstreamContext *cbc;
> > +    CodedBitstreamFragment current_obu;
> > +    VAConfigAttribValEncAV1 attr;
> > +    VAConfigAttribValEncAV1Ext1 attr_ext1;
> > +    VAConfigAttribValEncAV1Ext2 attr_ext2;
> > +
> > +    char sh_data[MAX_PARAM_BUFFER_SIZE]; /**< coded sequence header data. */
> > +    size_t sh_data_len; /**< bit length of sh_data. */
> > +    char fh_data[MAX_PARAM_BUFFER_SIZE]; /**< coded frame header data. */
> > +    size_t fh_data_len; /**< bit length of fh_data. */
> > +
> > +    uint8_t uniform_tile;
> > +    uint8_t use_128x128_superblock;
> > +    int sb_cols;
> > +    int sb_rows;
> > +    int tile_cols_log2;
> > +    int tile_rows_log2;
> > +    int max_tile_width_sb;
> > +    int max_tile_height_sb;
> > +    uint8_t width_in_sbs_minus_1[AV1_MAX_TILE_COLS];
> > +    uint8_t height_in_sbs_minus_1[AV1_MAX_TILE_ROWS];
> > +
> > +    int min_log2_tile_cols;
> > +    int max_log2_tile_cols;
> > +    int min_log2_tile_rows;
> > +    int max_log2_tile_rows;
> > +
> > +    int q_idx_idr;
> > +    int q_idx_p;
> > +    int q_idx_b;
> > +
> > +    /** user options */
> > +    int profile;
> > +    int level;
> > +    int tier;
> > +    int tile_cols, tile_rows;
> > +    int tile_groups;
> > +} VAAPIEncodeAV1Context;
> > +
> > +static av_cold int vaapi_encode_av1_configure(AVCodecContext *avctx)
> > +{
> > +    VAAPIEncodeContext     *ctx = avctx->priv_data;
> > +    VAAPIEncodeAV1Context *priv = avctx->priv_data;
> > +    int ret;
> > +
> > +    ret = ff_cbs_init(&priv->cbc, AV_CODEC_ID_AV1, avctx);
> > +    if (ret < 0)
> > +        return ret;
> > +
> > +    if (ctx->rc_mode->quality) {
> > +        priv->q_idx_p = av_clip(ctx->rc_quality, 0, AV1_MAX_QUANT);
> > +        if (fabs(avctx->i_quant_factor) > 0.0)
> > +            priv->q_idx_idr =
> > +                av_clip((fabs(avctx->i_quant_factor) * priv->q_idx_p  +
> > +                         avctx->i_quant_offset) + 0.5,
> > +                        0, AV1_MAX_QUANT);
> > +        else
> > +            priv->q_idx_idr = priv->q_idx_p;
> > +
> > +        if (fabs(avctx->b_quant_factor) > 0.0)
> > +            priv->q_idx_b =
> > +                av_clip((fabs(avctx->b_quant_factor) * priv->q_idx_p  +
> > +                         avctx->b_quant_offset) + 0.5,
> > +                        0, AV1_MAX_QUANT);
> > +        else
> > +            priv->q_idx_b = priv->q_idx_p;
> > +    } else {
> > +        /** Arbitrary value */
> > +        priv->q_idx_idr = priv->q_idx_p = priv->q_idx_b = 128;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +static int vaapi_encode_av1_add_obu(AVCodecContext *avctx,
> > +                                    CodedBitstreamFragment *au,
> > +                                    uint8_t type,
> > +                                    void *obu_unit)
> > +{
> > +    int ret;
> > +
> > +    ret = ff_cbs_insert_unit_content(au, -1,
> > +                                     type, obu_unit, NULL);
> > +    if (ret < 0) {
> > +        av_log(avctx, AV_LOG_ERROR, "Failed to add OBU unit: "
> > +               "type = %d.\n", type);
> > +        return ret;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +static int vaapi_encode_av1_write_obu(AVCodecContext *avctx,
> > +                                      char *data, size_t *data_len,
> > +                                      CodedBitstreamFragment *bs)
> > +{
> > +    VAAPIEncodeAV1Context *priv = avctx->priv_data;
> > +    int ret;
> > +
> > +    ret = ff_cbs_write_fragment_data(priv->cbc, bs);
> > +    if (ret < 0) {
> > +        av_log(avctx, AV_LOG_ERROR, "Failed to write packed header.\n");
> > +        return ret;
> > +    }
> > +
> > +    if ((size_t)8 * MAX_PARAM_BUFFER_SIZE < 8 * bs->data_size - bs->data_bit_padding) {
> > +        av_log(avctx, AV_LOG_ERROR, "Access unit too large: "
> > +               "%zu < %zu.\n", (size_t)8 * MAX_PARAM_BUFFER_SIZE,
> > +               8 * bs->data_size - bs->data_bit_padding);
> > +        return AVERROR(ENOSPC);
> > +    }
> > +
> > +    memcpy(data, bs->data, bs->data_size);
> > +    *data_len = 8 * bs->data_size - bs->data_bit_padding;
> > +
> > +    return 0;
> > +}
> > +
> > +static int get_relative_dist(const AV1RawSequenceHeader *seq,
> > +                             unsigned int a, unsigned int b)
> > +{
> > +    unsigned int diff, m;
> > +    if (!seq->enable_order_hint)
> > +        return 0;
> > +    diff = a - b;
> > +    m = 1 << seq->order_hint_bits_minus_1;
> > +    diff = (diff & (m - 1)) - (diff & m);
> > +    return diff;
> > +}
> > +
> > +static int write_ns(PutBitContext *pbc, uint32_t n, uint32_t value)
> > +{
> > +    uint32_t w, m, v, extra_bit;
> > +
> > +    w = av_log2(n) + 1;
> > +    m = (1 << w) - n;
> > +
> > +    if (value < m) {
> > +        v = value;
> > +        put_bits(pbc, w - 1, v);
> > +    } else {
> > +        v = m + ((value - m) >> 1);
> > +        extra_bit = (value - m) & 1;
> > +        put_bits(pbc, w - 1, v);
> > +        put_bits(pbc, 1, extra_bit);
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +/**
> > + * This API provide the minmum implemention according current enabled features
> > + * in frame header. If more features will enable in furture, please make sure
> > + * the relative flags of features should be packed correctly into frame header
> > + * obu in this API.
> > + */
> > +static int vaapi_encode_av1_write_frame_header(AVCodecContext *avctx,
> > +                                               VAAPIEncodePicture *pic,
> > +                                               char *data, size_t *data_len)
> > +{
> > +    VAAPIEncodeContext              *ctx = avctx->priv_data;
> > +    VAAPIEncodeAV1Context          *priv = avctx->priv_data;
> > +    AV1RawOBU                    *fh_obu = &priv->fh;
> > +    AV1RawOBU                    *sh_obu = &priv->sh;
> > +    AV1RawFrameHeader                *fh = &fh_obu->obu.frame.header;
> > +    AV1RawSequenceHeader             *sh = &sh_obu->obu.sequence_header;
> > +    VAEncPictureParameterBufferAV1 *vpic = pic->codec_picture_params;
> > +    PutBitContext pbc, pbc_tmp;
> > +    uint8_t byte;
> > +    int qindex, coded_lossless;
> > +    int id_len, frame_is_intra, skip_mode_allowed;
> > +    int start, payload_bits, obu_size, obu_size_len;
> > +    int qindex_offset, loopfilter_offset;
> > +    int cdef_start_offset, cdef_end_offset;
> > +    int i;
> > +
> > +    init_put_bits(&pbc, data, MAX_PARAM_BUFFER_SIZE);
> > +
> > +    /** obu header */
> > +    put_bits(&pbc, 1, fh_obu->header.obu_forbidden_bit);
> > +    put_bits(&pbc, 4, fh_obu->header.obu_type);
> > +    put_bits(&pbc, 1, fh_obu->header.obu_extension_flag);
> > +    put_bits(&pbc, 1, fh_obu->header.obu_has_size_field);
> > +    put_bits(&pbc, 1, fh_obu->header.obu_reserved_1bit);
> > +
> > +    /** record pbc status to re-write obu size later. */
> > +    if (fh_obu->header.obu_has_size_field) {
> > +        pbc_tmp = pbc;
> > +        put_bits32(&pbc, 0);
> > +        put_bits32(&pbc, 0);
> > +    }
> > +
> > +    start = put_bits_count(&pbc);
> > +
> > +    /** uncompressed_header() */
> > +    if (sh->frame_id_numbers_present_flag)
> > +        id_len = sh->additional_frame_id_length_minus_1 +
> > +                 sh->delta_frame_id_length_minus_2 + 3;
> > +
> > +    frame_is_intra = (fh->frame_type == AV1_FRAME_KEY ||
> > +                      fh->frame_type == AV1_FRAME_INTRA_ONLY);
> > +    if (!sh->reduced_still_picture_header) {
> > +        put_bits(&pbc, 1, fh->show_existing_frame);
> > +        if (fh->show_existing_frame) {
> > +            put_bits(&pbc, 3, fh->frame_to_show_map_idx);
> > +            goto trailing_bits;
> > +        }
> > +        if (sh->frame_id_numbers_present_flag)
> > +            put_bits(&pbc, id_len, fh->display_frame_id);
> > +
> > +        put_bits(&pbc, 2, fh->frame_type);
> > +        put_bits(&pbc, 1, fh->show_frame);
> > +        if (!fh->show_frame)
> > +            put_bits(&pbc, 1, fh->showable_frame);
> > +
> > +        if (!(fh->frame_type == AV1_FRAME_SWITCH ||
> > +            (fh->frame_type == AV1_FRAME_KEY && fh->show_frame)))
> > +            put_bits(&pbc, 1, fh->error_resilient_mode);
> > +    }
> > +
> > +    put_bits(&pbc, 1, fh->disable_cdf_update);
> > +    if (sh->seq_force_screen_content_tools == AV1_SELECT_SCREEN_CONTENT_TOOLS)
> > +        put_bits(&pbc, 1, fh->allow_screen_content_tools);
> > +
> > +    if (fh->allow_screen_content_tools && sh->seq_force_integer_mv == AV1_SELECT_INTEGER_MV)
> > +        put_bits(&pbc, 1, fh->force_integer_mv);
> > +
> > +    if (sh->frame_id_numbers_present_flag)
> > +        put_bits(&pbc, id_len, fh->current_frame_id);
> > +
> > +    if (fh->frame_type != AV1_FRAME_SWITCH && !sh->reduced_still_picture_header)
> > +        put_bits(&pbc, 1, fh->frame_size_override_flag);
> > +
> > +    if (sh->enable_order_hint)
> > +        put_bits(&pbc, sh->order_hint_bits_minus_1 + 1, fh->order_hint);
> > +
> > +    if (!(frame_is_intra || fh->error_resilient_mode))
> > +        put_bits(&pbc, 3, fh->primary_ref_frame);
> > +
> > +    if (!(fh->frame_type == AV1_FRAME_SWITCH ||
> > +        fh->frame_type == AV1_FRAME_KEY && fh->show_frame))
> > +        put_bits(&pbc, 8, fh->refresh_frame_flags);
> > +
> > +    if (frame_is_intra) {
> > +        /** render_size() */
> > +         put_bits(&pbc, 1, fh->render_and_frame_size_different);
> > +    } else {
> > +        if (!frame_is_intra && sh->enable_order_hint)
> > +            put_bits(&pbc, 1, fh->frame_refs_short_signaling);
> > +
> > +        for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
> > +            if (!fh->frame_refs_short_signaling)
> > +                put_bits(&pbc, 3, fh->ref_frame_idx[i]);
> > +        }
> > +
> > +        if (!(fh->frame_size_override_flag && !fh->error_resilient_mode))
> > +            put_bits(&pbc, 1, fh->render_and_frame_size_different);
> > +
> > +        if (!fh->force_integer_mv)
> > +            put_bits(&pbc, 1, fh->allow_high_precision_mv);
> > +
> > +        /** read_interpolation_filter() */
> > +        put_bits(&pbc, 1, fh->is_filter_switchable);
> > +        if (!fh->is_filter_switchable)
> > +            put_bits(&pbc, 2, fh->interpolation_filter);
> > +
> > +        put_bits(&pbc, 1, fh->is_motion_mode_switchable);
> > +    }
> > +
> > +    if (!(sh->reduced_still_picture_header || fh->disable_cdf_update))
> > +        put_bits(&pbc, 1, fh->disable_frame_end_update_cdf);
> > +
> > +    /** tile_info() */
> > +    put_bits(&pbc, 1, fh->uniform_tile_spacing_flag);
> > +    if (fh->uniform_tile_spacing_flag) {
> > +        for (i = 0; i < priv->tile_cols_log2 - priv->min_log2_tile_cols; i++) {
> > +            put_bits(&pbc, 1, 1);
> > +        }
> > +        if (priv->tile_cols_log2 < priv->max_log2_tile_cols)
> > +            put_bits(&pbc, 1, 0);
> > +
> > +        for (i = 0; i < priv->tile_rows_log2 - priv->min_log2_tile_rows; i++) {
> > +            put_bits(&pbc, 1, 1);
> > +        }
> > +        if (priv->tile_rows_log2 < priv->max_log2_tile_rows)
> > +            put_bits(&pbc, 1, 0);
> > +    } else {
> > +        int start_sb = 0;
> > +        int max_width, max_height;
> > +        for (i = 0; start_sb < priv->sb_cols; i++) {
> > +            max_width = FFMIN(priv->sb_cols - start_sb, priv->max_tile_width_sb);
> > +            write_ns(&pbc, max_width, fh->width_in_sbs_minus_1[i]);
> > +            start_sb += fh->width_in_sbs_minus_1[i] + 1;
> > +        }
> > +
> > +        start_sb = 0;
> > +        for (i = 0; start_sb < priv->sb_rows; i++) {
> > +            max_height = FFMIN(priv->sb_rows - start_sb, priv->max_tile_height_sb);
> > +            write_ns(&pbc, max_height, fh->height_in_sbs_minus_1[i]);
> > +            start_sb += fh->height_in_sbs_minus_1[i] + 1;
> > +        }
> > +    }
> > +
> > +    if (priv->tile_cols_log2 || priv->tile_rows_log2) {
> > +        put_bits(&pbc, priv->tile_cols_log2 + priv->tile_rows_log2, fh->context_update_tile_id);
> > +        put_bits(&pbc, 2, fh->tile_size_bytes_minus1);
> > +    }
> > +
> > +    qindex_offset = put_bits_count(&pbc);
> > +    /** quantization_params() */
> > +    put_bits(&pbc, 8, fh->base_q_idx);
> > +    put_bits(&pbc, 1, fh->delta_q_y_dc);
> > +    put_bits(&pbc, 1, fh->delta_q_u_dc);
> > +    put_bits(&pbc, 1, fh->delta_q_u_ac);
> > +    put_bits(&pbc, 1, fh->using_qmatrix);
> > +
> > +    /** segmentation_params() */
> > +    put_bits(&pbc, 1, fh->segmentation_enabled);
> > +
> > +    /** delta_q_params() */
> > +    if (fh->base_q_idx)
> > +        put_bits(&pbc, 1, fh->delta_q_present);
> > +    if (fh->delta_q_present)
> > +        put_bits(&pbc, 2, fh->delta_q_res);
> > +
> > +    /** delta_lf_params() */
> > +    if (fh->delta_q_present) {
> > +        if (!fh->allow_intrabc)
> > +            put_bits(&pbc, 1, fh->delta_lf_present);
> > +        if (fh->delta_lf_present) {
> > +            put_bits(&pbc, 2, fh->delta_lf_res);
> > +            put_bits(&pbc, 1, fh->delta_lf_multi);
> > +        }
> > +    }
> > +
> > +    /** codelossless */
> > +    coded_lossless = 1;
> > +    for (i = 0; i < AV1_MAX_SEGMENTS; i++) {
> > +        if (fh->segmentation_enabled && fh->feature_enabled[i][AV1_SEG_LVL_ALT_Q])
> > +            qindex = fh->base_q_idx + fh->feature_value[i][AV1_SEG_LVL_ALT_Q];
> > +        else
> > +            qindex = fh->base_q_idx;
> > +        qindex = av_clip_uintp2(qindex, 8);
> > +
> > +        if (qindex || fh->delta_q_y_dc || fh->delta_q_u_ac || fh->delta_q_u_dc ||
> > +            fh->delta_q_v_ac || fh->delta_q_v_dc)
> > +            coded_lossless = 0;
> > +    }
> > +
> > +    loopfilter_offset = put_bits_count(&pbc);
> > +    /** loop_filter_params() */
> > +    if (!(coded_lossless || fh->allow_intrabc)) {
> > +        put_bits(&pbc, 6, fh->loop_filter_level[0]);
> > +        put_bits(&pbc, 6, fh->loop_filter_level[1]);
> > +        if (fh->loop_filter_level[0] || fh->loop_filter_level[1]) {
> > +            put_bits(&pbc, 6, fh->loop_filter_level[2]);
> > +            put_bits(&pbc, 6, fh->loop_filter_level[3]);
> > +        }
> > +        put_bits(&pbc, 3, fh->loop_filter_sharpness);
> > +        put_bits(&pbc, 1, fh->loop_filter_delta_enabled);
> > +    }
> > +
> > +    cdef_start_offset = put_bits_count(&pbc);
> > +    /** cdef_params() */
> > +    if (!(coded_lossless || fh->allow_intrabc || !sh->enable_cdef)) {
> > +        put_bits(&pbc, 2, fh->cdef_damping_minus_3);
> > +        put_bits(&pbc, 2, fh->cdef_bits);
> > +        for (i = 0; i < (1 << fh->cdef_bits); i++) {
> > +            put_bits(&pbc, 4, fh->cdef_y_pri_strength[i]);
> > +            put_bits(&pbc, 2, fh->cdef_y_sec_strength[i]);
> > +            put_bits(&pbc, 4, fh->cdef_uv_pri_strength[i]);
> > +            put_bits(&pbc, 2, fh->cdef_uv_sec_strength[i]);
> > +        }
> > +    }
> > +    cdef_end_offset = put_bits_count(&pbc);
> > +
> > +    /** read_tx_mode() */
> > +    if (fh->tx_mode == AV1_TX_MODE_SELECT)
> > +        put_bits(&pbc, 1, 1);
> > +    else
> > +        put_bits(&pbc, 1, 0);
> > +
> > +    /** frame_reference_mode() */
> > +    if (!frame_is_intra)
> > +        put_bits(&pbc, 1, fh->reference_select);
> > +
> > +    /** skip_mode_params() */
> > +    if (frame_is_intra || !fh->reference_select || !sh->enable_order_hint)
> > +        skip_mode_allowed = 0;
> > +    else {
> > +        int forward_idx, backward_idx;
> > +        int ref_hint, forward_hint, backward_hint;
> > +
> > +        forward_idx = -1;
> > +        backward_idx = -1;
> > +
> > +        for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
> > +            ref_hint = fh->ref_order_hint[fh->ref_frame_idx[i]];
> > +            if (get_relative_dist(sh, ref_hint, fh->order_hint) < 0) {
> > +                if (forward_idx < 0 || get_relative_dist(sh, ref_hint, forward_idx) > 0) {
> > +                    forward_idx = i;
> > +                    forward_hint = ref_hint;
> > +                }
> > +            } else if (get_relative_dist(sh, ref_hint, fh->order_hint) > 0) {
> > +                if (backward_idx < 0 || get_relative_dist(sh, ref_hint, backward_hint) < 0) {
> > +                    backward_idx = i;
> > +                    backward_hint = ref_hint;
> > +                }
> > +            }
> > +        }
> > +        if (forward_idx < 0)
> > +            skip_mode_allowed = 0;
> > +        else if (backward_idx >= 0)
> > +            skip_mode_allowed = 1;
> > +        else {
> > +            int second_forward_idx, second_forward_hint;
> > +            second_forward_idx = -1;
> > +            for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
> > +                ref_hint = fh->ref_order_hint[fh->ref_frame_idx[i]];
> > +                if (get_relative_dist(sh, ref_hint, forward_hint) < 0) {
> > +                    if (second_forward_idx < 0 || get_relative_dist(sh, ref_hint, second_forward_hint) > 0){
> > +                        second_forward_idx = i;
> > +                        second_forward_hint = ref_hint;
> > +                    }
> > +                }
> > +            }
> > +            if (second_forward_idx < 0)
> > +                skip_mode_allowed = 0;
> > +            else
> > +                skip_mode_allowed = 1;
> > +        }
> > +    }
> > +
> > +    if (skip_mode_allowed)
> > +        put_bits(&pbc, 1, fh->skip_mode_present);
> > +
> > +    put_bits(&pbc, 1, fh->reduced_tx_set);
> > +
> > +    /** global_motion_params() */
> > +    if (!frame_is_intra) {
> > +        for (i = AV1_REF_FRAME_LAST; i <= AV1_REF_FRAME_ALTREF; i++) {
> > +            put_bits(&pbc, 1, fh->is_global[i]);
> > +            if (fh->is_global[i]) {
> > +                put_bits(&pbc, 1, fh->is_rot_zoom[i]);
> > +                if (!fh->is_rot_zoom[i])
> > +                    put_bits(&pbc, 1, fh->is_translation[i]);
> > +            }
> > +        }
> > +    }
> > +
> > +trailing_bits:
> > +    payload_bits = put_bits_count(&pbc) - start;
> > +
> > +    /** trailing_bits() */
> > +    put_bits(&pbc, 1, 1);
> > +    obu_size = (put_bits_count(&pbc) - start + 7) / 8;
> > +    for (i = 0; i < obu_size * 8 - payload_bits - 1; i++)
> > +        put_bits(&pbc, 1, 0);
> > +
> > +    flush_put_bits(&pbc);
> > +    *data_len = put_bits_count(&pbc);
> > +
> > +    /** update obu size in bitstream */
> > +    if (fh_obu->header.obu_has_size_field) {
> > +        obu_size_len = priv->attr_ext2.bits.obu_size_bytes_minus1 + 1;
> > +        for (i = 0; i < obu_size_len; i++) {
> > +            byte = obu_size >> (7 * i) & 0x7f;
> > +            if (i < obu_size_len - 1)
> > +                byte |= 0x80;
> > +            put_bits(&pbc_tmp, 8, byte);
> > +        }
> > +        flush_put_bits(&pbc_tmp);
> > +        memmove(pbc_tmp.buf_ptr, pbc_tmp.buf_ptr + (8 - obu_size_len), obu_size);
> > +        *data_len -= (8 - obu_size_len) * 8;
> > +    }
> > +
> > +    if (fh->show_existing_frame)
> > +        return 0;
> > +
> > +    if (!(ctx->va_rc_mode & VA_RC_CQP)) {
> > +        vpic->min_base_qindex = av_clip(avctx->qmin, 1, AV1_MAX_QUANT);
> > +        vpic->max_base_qindex = av_clip(avctx->qmax, 1, AV1_MAX_QUANT);
> > +
> > +        vpic->bit_offset_qindex            = qindex_offset - (8 - obu_size_len) * 8;
> > +        vpic->bit_offset_loopfilter_params = loopfilter_offset - (8 - obu_size_len) * 8;
> > +        vpic->bit_offset_cdef_params       = cdef_start_offset - (8 - obu_size_len) * 8;
> > +        vpic->size_in_bits_cdef_params     = cdef_end_offset - cdef_start_offset;
> > +        vpic->size_in_bits_frame_hdr_obu   = *data_len;
> > +
> > +        vpic->byte_offset_frame_hdr_obu_size = (((pic->type == PICTURE_TYPE_IDR) ?
> > +                                               priv->sh_data_len / 8 : 0) +
> > +                                               (fh_obu->header.obu_extension_flag ?
> > +                                               2 : 1));
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +static int tile_log2(int blkSize, int target) {
> > +    int k;
> > +    for (k = 0; (blkSize << k) < target; k++);
> > +    return k;
> > +}
> > +
> > +static int vaapi_encode_av1_set_tile(AVCodecContext *avctx)
> > +{
> > +    VAAPIEncodeAV1Context *priv = avctx->priv_data;
> > +    int mi_cols, mi_rows, sb_shift, sb_size;
> > +    int max_tile_area_sb, max_tile_area_sb_varied;
> > +    int tile_width_sb, tile_height_sb, widest_tile_sb;
> > +    int min_log2_tiles;
> > +    int tile_rows_tmp, i;
> > +
> > +    if (priv->tile_cols > AV1_MAX_TILE_COLS ||
> > +        priv->tile_rows > AV1_MAX_TILE_ROWS) {
> > +        av_log(avctx, AV_LOG_ERROR, "Invalid tile number %dx%d, should less than %dx%d.\n",
> > +               priv->tile_cols, priv->tile_rows, AV1_MAX_TILE_COLS, AV1_MAX_TILE_ROWS);
> > +        return AVERROR(EINVAL);
> > +    }
> > +
> > +    mi_cols = 2 * ((avctx->width + 7) >> 3);
> > +    mi_rows = 2 * ((avctx->height + 7) >> 3);
> > +    priv->sb_cols = priv->use_128x128_superblock ?
> > +                    ((mi_cols + 31) >> 5) : ((mi_cols + 15) >> 4);
> > +    priv->sb_rows = priv->use_128x128_superblock ?
> > +                    ((mi_rows + 31) >> 5) : ((mi_rows + 15) >> 4);
> > +    sb_shift = priv->use_128x128_superblock ? 5 : 4;
> > +    sb_size  = sb_shift + 2;
> > +    priv->max_tile_width_sb = AV1_MAX_TILE_WIDTH >> sb_size;
> > +    max_tile_area_sb = AV1_MAX_TILE_AREA  >> (2 * sb_size);
> > +
> > +    priv->min_log2_tile_cols = tile_log2(priv->max_tile_width_sb, priv->sb_cols);
> > +    priv->max_log2_tile_cols = tile_log2(1, FFMIN(priv->sb_cols, AV1_MAX_TILE_COLS));
> > +    priv->max_log2_tile_rows = tile_log2(1, FFMIN(priv->sb_rows, AV1_MAX_TILE_ROWS));
> > +    min_log2_tiles = FFMAX(priv->min_log2_tile_cols,
> > +                           tile_log2(max_tile_area_sb, priv->sb_rows * priv->sb_cols));
> > +
> > +    if (priv->tile_cols != av_clip(priv->tile_cols, (priv->sb_cols + priv->max_tile_width_sb - 1) / priv->max_tile_width_sb, priv->sb_cols)) {
> > +        priv->tile_cols = av_clip(priv->tile_cols, (priv->sb_cols + priv->max_tile_width_sb - 1) / priv->max_tile_width_sb, priv->sb_cols);
> > +        av_log(avctx, AV_LOG_WARNING, "Invalid tile cols, correct to %d.\n", priv->tile_cols);
> > +    }
> > +
> > +    priv->tile_cols_log2 = tile_log2(1, priv->tile_cols);
> > +    tile_width_sb = (priv->sb_cols + (1 << priv->tile_cols_log2) - 1) >>
> > +                    priv->tile_cols_log2;
> > +
> > +    if (priv->tile_rows > priv->sb_rows) {
> > +        priv->tile_rows = priv->sb_rows;
> > +        av_log(avctx, AV_LOG_WARNING, "Invalid tile rows, correct to %d.\n", priv->tile_rows);
> > +    }
> > +
> > +    for (tile_rows_tmp = priv->tile_rows; tile_rows_tmp <= priv->sb_rows && tile_rows_tmp <= AV1_MAX_TILE_ROWS; tile_rows_tmp++) {
> > +        /** try uniformed tile. */
> > +        priv->tile_rows_log2 = tile_log2(1, tile_rows_tmp);
> > +        if ((priv->sb_cols + tile_width_sb - 1) / tile_width_sb == priv->tile_cols) {
> > +            for (i = 0; i < priv->tile_cols - 1; i++)
> > +                priv->width_in_sbs_minus_1[i] = tile_width_sb - 1;
> > +            priv->width_in_sbs_minus_1[i] = priv->sb_cols - (priv->tile_cols - 1) * tile_width_sb - 1;
> > +
> > +            tile_height_sb = (priv->sb_rows + (1 << priv->tile_rows_log2) - 1) >>
> > +                             priv->tile_rows_log2;
> > +
> > +            if ((priv->sb_rows + tile_height_sb - 1) / tile_height_sb == tile_rows_tmp &&
> > +                tile_height_sb <= max_tile_area_sb / tile_width_sb) {
> > +                for (i = 0; i < tile_rows_tmp - 1; i++)
> > +                    priv->height_in_sbs_minus_1[i] = tile_height_sb - 1;
> > +                priv->height_in_sbs_minus_1[i] = priv->sb_rows - (tile_rows_tmp - 1) * tile_height_sb - 1;
> > +
> > +                priv->uniform_tile = 1;
> > +                priv->min_log2_tile_rows = FFMAX(min_log2_tiles - priv->tile_cols_log2, 0);
> > +
> > +                break;
> > +            }
> > +        }
> > +
> > +        /** try non-uniformed tile. */
> > +        widest_tile_sb = 0;
> > +        for (i = 0; i < priv->tile_cols; i++) {
> > +            priv->width_in_sbs_minus_1[i] = (i + 1) * priv->sb_cols / priv->tile_cols - i * priv->sb_cols / priv->tile_cols - 1;
> > +            widest_tile_sb = FFMAX(widest_tile_sb, priv->width_in_sbs_minus_1[i] + 1);
> > +        }
> > +
> > +        if (min_log2_tiles)
> > +            max_tile_area_sb_varied = (priv->sb_rows * priv->sb_cols) >> (min_log2_tiles + 1);
> > +        else
> > +            max_tile_area_sb_varied = priv->sb_rows * priv->sb_cols;
> > +        priv->max_tile_height_sb = FFMAX(1, max_tile_area_sb_varied / widest_tile_sb);
> > +
> > +        if (tile_rows_tmp == av_clip(tile_rows_tmp, (priv->sb_rows + priv->max_tile_height_sb - 1) / priv->max_tile_height_sb, priv->sb_rows)) {
> > +            for (i = 0; i < tile_rows_tmp; i++)
> > +                priv->height_in_sbs_minus_1[i] = (i + 1) * priv->sb_rows / tile_rows_tmp - i * priv->sb_rows / tile_rows_tmp - 1;
> > +
> > +            break;
> > +        }
> > +    }
> > +
> > +    if (priv->tile_rows != tile_rows_tmp) {
> > +        priv->tile_rows = tile_rows_tmp;
> > +        av_log(avctx, AV_LOG_WARNING, "Invalid tile rows, correct to %d.\n", priv->tile_rows);
> > +    }
> > +
> > +    /** check if tile cols/rows is supported by driver. */
> > +    if (priv->attr_ext2.bits.max_tile_num_minus1) {
> > +        if ((priv->tile_cols * priv->tile_rows - 1) > priv->attr_ext2.bits.max_tile_num_minus1) {
> > +            av_log(avctx, AV_LOG_ERROR, "Unsupported tile num %d * %d = %d by driver, "
> > +                   "should be less than %d.\n", priv->tile_cols, priv->tile_rows,
> > +                   priv->tile_cols * priv->tile_rows,
> > +                   priv->attr_ext2.bits.max_tile_num_minus1 + 1);
> > +            return AVERROR(EINVAL);
> > +        }
> > +    }
> > +    av_log(avctx, AV_LOG_DEBUG, "Setting tile cols/rows to %d/%d.\n",
> > +           priv->tile_cols, priv->tile_rows);
> > +
> > +    /** check if tile group numbers is valid. */
> > +    if (priv->tile_groups > priv->tile_cols * priv->tile_rows) {
> > +        av_log(avctx, AV_LOG_WARNING, "Invalid tile groups number %d, "
> > +        "correct to %d.\n", priv->tile_groups, priv->tile_cols * priv->tile_rows);
> > +        priv->tile_groups = priv->tile_cols * priv->tile_rows;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +static int vaapi_encode_av1_write_sequence_header(AVCodecContext *avctx,
> > +                                                  char *data, size_t *data_len)
> > +{
> > +    VAAPIEncodeAV1Context *priv = avctx->priv_data;
> > +
> > +    memcpy(data, &priv->sh_data, MAX_PARAM_BUFFER_SIZE * sizeof(char));
> > +    *data_len = priv->sh_data_len;
> > +
> > +    return 0;
> > +}
> > +
> > +static int vaapi_encode_av1_init_sequence_params(AVCodecContext *avctx)
> > +{
> > +    VAAPIEncodeContext               *ctx = avctx->priv_data;
> > +    VAAPIEncodeAV1Context           *priv = avctx->priv_data;
> > +    AV1RawOBU                     *sh_obu = &priv->sh;
> > +    AV1RawSequenceHeader              *sh = &sh_obu->obu.sequence_header;
> > +    VAEncSequenceParameterBufferAV1 *vseq = ctx->codec_sequence_params;
> > +    CodedBitstreamFragment           *obu = &priv->current_obu;
> > +    const AVPixFmtDescriptor *desc;
> > +    int ret;
> > +
> > +    memset(sh_obu, 0, sizeof(*sh_obu));
> > +    sh_obu->header.obu_type = AV1_OBU_SEQUENCE_HEADER;
> > +
> > +    desc = av_pix_fmt_desc_get(priv->common.input_frames->sw_format);
> > +    av_assert0(desc);
> > +
> > +    sh->seq_profile  = avctx->profile;
> > +    if (!sh->seq_force_screen_content_tools)
> > +        sh->seq_force_integer_mv = AV1_SELECT_INTEGER_MV;
> > +    sh->frame_width_bits_minus_1  = av_log2(avctx->width);
> > +    sh->frame_height_bits_minus_1 = av_log2(avctx->height);
> > +    sh->max_frame_width_minus_1   = avctx->width - 1;
> > +    sh->max_frame_height_minus_1  = avctx->height - 1;
> > +    sh->seq_tier[0]               = priv->tier;
> > +    /** enable order hint and reserve maximum 8 bits for it by default. */
> > +    sh->enable_order_hint         = 1;
> > +    sh->order_hint_bits_minus_1   = 7;
> > +
> > +    sh->color_config = (AV1RawColorConfig) {
> > +        .high_bitdepth                  = desc->comp[0].depth == 8 ? 0 : 1,
> > +        .color_primaries                = avctx->color_primaries,
> > +        .transfer_characteristics       = avctx->color_trc,
> > +        .matrix_coefficients            = avctx->colorspace,
> > +        .color_description_present_flag = (avctx->color_primaries != AVCOL_PRI_UNSPECIFIED ||
> > +                                           avctx->color_trc       != AVCOL_TRC_UNSPECIFIED ||
> > +                                           avctx->colorspace      != AVCOL_SPC_UNSPECIFIED),
> > +        .color_range                    = avctx->color_range == AVCOL_RANGE_JPEG,
> > +        .subsampling_x                  = desc->log2_chroma_w,
> > +        .subsampling_y                  = desc->log2_chroma_h,
> > +    };
> > +
> > +    if (avctx->level != FF_LEVEL_UNKNOWN) {
> > +        sh->seq_level_idx[0] = avctx->level;
> > +    } else {
> > +        const AV1LevelDescriptor *level;
> > +        float framerate;
> > +
> > +        if (avctx->framerate.num > 0 && avctx->framerate.den > 0)
> > +            framerate = avctx->framerate.num / avctx->framerate.den;
> > +        else
> > +            framerate = 0;
> > +
> > +        level = ff_av1_guess_level(avctx->bit_rate, priv->tier,
> > +                                   ctx->surface_width, ctx->surface_height,
> > +                                   priv->tile_rows * priv->tile_cols,
> > +                                   priv->tile_cols, framerate);
> > +        if (level) {
> > +            av_log(avctx, AV_LOG_VERBOSE, "Using level %s.\n", level->name);
> > +            sh->seq_level_idx[0] = level->level_idx;
> > +        } else {
> > +            av_log(avctx, AV_LOG_VERBOSE, "Stream will not conform to "
> > +                   "any normal level, using level 6.3 by default.\n");
> > +            sh->seq_level_idx[0] = 19;
> > +            sh->seq_tier[0] = 1;
> > +        }
> > +    }
> > +    vseq->seq_profile             = sh->seq_profile;
> > +    vseq->seq_level_idx           = sh->seq_level_idx[0];
> > +    vseq->seq_tier                = sh->seq_tier[0];
> > +    vseq->order_hint_bits_minus_1 = sh->order_hint_bits_minus_1;
> > +    vseq->intra_period            = ctx->gop_size;
> > +    vseq->ip_period               = ctx->b_per_p + 1;
> > +
> > +    vseq->seq_fields.bits.enable_order_hint = sh->enable_order_hint;
> > +
> > +    if (!(ctx->va_rc_mode & VA_RC_CQP)) {
> > +        vseq->bits_per_second = ctx->va_bit_rate;
> > +        vseq->seq_fields.bits.enable_cdef = sh->enable_cdef = 1;
> > +    }
> > +
> > +    ret = vaapi_encode_av1_add_obu(avctx, obu, AV1_OBU_SEQUENCE_HEADER, &priv->sh);
> > +    if (ret < 0)
> > +        goto end;
> > +
> > +    ret = vaapi_encode_av1_write_obu(avctx, priv->sh_data, &priv->sh_data_len, obu);
> > +    if (ret < 0)
> > +        goto end;
> > +
> > +end:
> > +    ff_cbs_fragment_reset(obu);
> > +    return ret;
> > +}
> > +
> > +static int vaapi_encode_av1_init_picture_params(AVCodecContext *avctx,
> > +                                                VAAPIEncodePicture *pic)
> > +{
> > +    VAAPIEncodeAV1Context          *priv = avctx->priv_data;
> > +    VAAPIEncodeAV1Picture          *hpic = pic->priv_data;
> > +    AV1RawOBU                    *fh_obu = &priv->fh;
> > +    AV1RawFrameHeader                *fh = &fh_obu->obu.frame.header;
> > +    VAEncPictureParameterBufferAV1 *vpic = pic->codec_picture_params;
> > +    CodedBitstreamFragment          *obu = &priv->current_obu;
> > +    VAAPIEncodePicture    *ref;
> > +    VAAPIEncodeAV1Picture *href;
> > +    int slot, i;
> > +    int ret;
> > +    static const int8_t default_loop_filter_ref_deltas[AV1_TOTAL_REFS_PER_FRAME] =
> > +        { 1, 0, 0, 0, -1, 0, -1, -1 };
> > +
> > +    memset(fh_obu, 0, sizeof(*fh_obu));
> > +    pic->nb_slices = priv->tile_groups;
> > +    fh_obu->header.obu_type = AV1_OBU_FRAME_HEADER;
> > +    fh_obu->header.obu_has_size_field = 1;
> > +
> > +    switch (pic->type) {
> > +    case PICTURE_TYPE_IDR:
> > +        av_assert0(pic->nb_refs[0] == 0 || pic->nb_refs[1]);
> > +        fh->frame_type = AV1_FRAME_KEY;
> > +        fh->refresh_frame_flags = 0xFF;
> > +        fh->base_q_idx = priv->q_idx_idr;
> > +        hpic->slot = 0;
> > +        hpic->last_idr_frame = pic->display_order;
> > +        break;
> > +    case PICTURE_TYPE_P:
> > +        av_assert0(pic->nb_refs[0]);
> > +        fh->frame_type = AV1_FRAME_INTER;
> > +        fh->base_q_idx = priv->q_idx_p;
> > +        ref = pic->refs[0][pic->nb_refs[0] - 1];
> > +        href = ref->priv_data;
> > +        hpic->slot = !href->slot;
> > +        hpic->last_idr_frame = href->last_idr_frame;
> > +        fh->refresh_frame_flags = 1 << hpic->slot;
> > +
> > +        /** set the nearest frame in L0 as all reference frame. */
> > +        for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
> > +            fh->ref_frame_idx[i] = href->slot;
> > +        }
> > +        fh->primary_ref_frame = href->slot;
> > +        fh->ref_order_hint[href->slot] = ref->display_order - href->last_idr_frame;
> > +        vpic->ref_frame_ctrl_l0.fields.search_idx0 = AV1_REF_FRAME_LAST;
> > +
> > +        /** set the 2nd nearest frame in L0 as Golden frame. */
> > +        if (pic->nb_refs[0] > 1) {
> > +            ref = pic->refs[0][pic->nb_refs[0] - 2];
> > +            href = ref->priv_data;
> > +            fh->ref_frame_idx[3] = href->slot;
> > +            fh->ref_order_hint[href->slot] = ref->display_order - href->last_idr_frame;
> > +            vpic->ref_frame_ctrl_l0.fields.search_idx1 = AV1_REF_FRAME_GOLDEN;
> > +        }
> > +        break;
> > +    case PICTURE_TYPE_B:
> > +        av_assert0(pic->nb_refs[0] && pic->nb_refs[1]);
> > +        fh->frame_type = AV1_FRAME_INTER;
> > +        fh->base_q_idx = priv->q_idx_b;
> > +        fh->refresh_frame_flags = 0x0;
> > +        fh->reference_select = 1;
> > +
> > +        /** B frame will not be referenced, disable its recon frame. */
> > +        vpic->picture_flags.bits.disable_frame_recon = 1;
> > +
> > +        /** Use LAST_FRAME and BWDREF_FRAME for reference. */
> > +        vpic->ref_frame_ctrl_l0.fields.search_idx0 = AV1_REF_FRAME_LAST;
> > +        vpic->ref_frame_ctrl_l1.fields.search_idx0 = AV1_REF_FRAME_BWDREF;
> > +
> > +        ref                            = pic->refs[0][pic->nb_refs[0] - 1];
> > +        href                           = ref->priv_data;
> > +        hpic->last_idr_frame           = href->last_idr_frame;
> > +        fh->primary_ref_frame          = href->slot;
> > +        fh->ref_order_hint[href->slot] = ref->display_order - href->last_idr_frame;
> > +        for (i = 0; i < AV1_REF_FRAME_GOLDEN; i++) {
> > +            fh->ref_frame_idx[i] = href->slot;
> > +        }
> > +
> > +        ref                            = pic->refs[1][pic->nb_refs[1] - 1];
> > +        href                           = ref->priv_data;
> > +        fh->ref_order_hint[href->slot] = ref->display_order - href->last_idr_frame;
> > +        for (i = AV1_REF_FRAME_GOLDEN; i < AV1_REFS_PER_FRAME; i++) {
> > +            fh->ref_frame_idx[i] = href->slot;
> > +        }
> > +        break;
> > +    default:
> > +        av_assert0(0 && "invalid picture type");
> > +    }
> > +
> > +    fh->show_frame                = pic->display_order <= pic->encode_order;
> > +    fh->showable_frame            = fh->frame_type != AV1_FRAME_KEY;
> > +    fh->frame_width_minus_1       = avctx->width - 1;
> > +    fh->frame_height_minus_1      = avctx->height - 1;
> > +    fh->render_width_minus_1      = fh->frame_width_minus_1;
> > +    fh->render_height_minus_1     = fh->frame_height_minus_1;
> > +    fh->order_hint                = pic->display_order - hpic->last_idr_frame;
> > +    fh->tile_cols                 = priv->tile_cols;
> > +    fh->tile_rows                 = priv->tile_rows;
> > +    fh->tile_cols_log2            = priv->tile_cols_log2;
> > +    fh->tile_rows_log2            = priv->tile_rows_log2;
> > +    fh->uniform_tile_spacing_flag = priv->uniform_tile;
> > +    fh->tile_size_bytes_minus1    = priv->attr_ext2.bits.tile_size_bytes_minus1;
> > +    fh->reduced_tx_set            = 1;
> > +
> > +    /** ignore ONLY_4x4 mode for codedlossless is not fully implemented. */
> > +    if (priv->attr_ext2.bits.tx_mode_support & 0x04)
> > +        fh->tx_mode = AV1_TX_MODE_SELECT;
> > +    else if (priv->attr_ext2.bits.tx_mode_support & 0x02)
> > +        fh->tx_mode = AV1_TX_MODE_LARGEST;
> > +    else
> > +        return AVERROR(EINVAL);
> > +
> > +    for (i = 0; i < fh->tile_cols; i++)
> > +        fh->width_in_sbs_minus_1[i] = vpic->width_in_sbs_minus_1[i] = priv->width_in_sbs_minus_1[i];
> > +
> > +    for (i = 0; i < fh->tile_rows; i++)
> > +        fh->height_in_sbs_minus_1[i] = vpic->height_in_sbs_minus_1[i] = priv->height_in_sbs_minus_1[i];
> > +
> > +    memcpy(fh->loop_filter_ref_deltas, default_loop_filter_ref_deltas,
> > +           AV1_TOTAL_REFS_PER_FRAME * sizeof(int8_t));
> > +
> > +    if (fh->frame_type == AV1_FRAME_KEY && fh->show_frame) {
> > +        fh->error_resilient_mode = 1;
> > +    }
> > +
> > +    if (fh->frame_type == AV1_FRAME_KEY || fh->error_resilient_mode)
> > +        fh->primary_ref_frame = AV1_PRIMARY_REF_NONE;
> > +
> > +    vpic->base_qindex          = fh->base_q_idx;
> > +    vpic->frame_width_minus_1  = fh->frame_width_minus_1;
> > +    vpic->frame_height_minus_1 = fh->frame_height_minus_1;
> > +    vpic->primary_ref_frame    = fh->primary_ref_frame;
> > +    vpic->reconstructed_frame  = pic->recon_surface;
> > +    vpic->coded_buf            = pic->output_buffer;
> > +    vpic->tile_cols            = fh->tile_cols;
> > +    vpic->tile_rows            = fh->tile_rows;
> > +    vpic->order_hint           = fh->order_hint;
> > +#if VA_CHECK_VERSION(1, 15, 0)
> > +    vpic->refresh_frame_flags  = fh->refresh_frame_flags;
> > +#endif
> > +
> > +    vpic->picture_flags.bits.enable_frame_obu     = 0;
> > +    vpic->picture_flags.bits.frame_type           = fh->frame_type;
> > +    vpic->picture_flags.bits.reduced_tx_set       = fh->reduced_tx_set;
> > +    vpic->picture_flags.bits.error_resilient_mode = fh->error_resilient_mode;
> > +
> > +    /** let driver decide to use single or compound reference prediction mode. */
> > +    vpic->mode_control_flags.bits.reference_mode = fh->reference_select ? 2 : 0;
> > +    vpic->mode_control_flags.bits.tx_mode = fh->tx_mode;
> > +
> > +    vpic->tile_group_obu_hdr_info.bits.obu_has_size_field = 1;
> > +
> > +    /** set reference. */
> > +    for (i = 0; i < AV1_REFS_PER_FRAME; i++)
> > +        vpic->ref_frame_idx[i] = fh->ref_frame_idx[i];
> > +
> > +    for (i = 0; i < FF_ARRAY_ELEMS(vpic->reference_frames); i++)
> > +        vpic->reference_frames[i] = VA_INVALID_SURFACE;
> > +
> > +    for (i = 0; i < MAX_REFERENCE_LIST_NUM; i++) {
> > +        for (int j = 0; j < pic->nb_refs[i]; j++) {
> > +            VAAPIEncodePicture *ref_pic = pic->refs[i][j];
> > +
> > +            slot = ((VAAPIEncodeAV1Picture*)ref_pic->priv_data)->slot;
> > +            av_assert0(vpic->reference_frames[slot] == VA_INVALID_SURFACE);
> > +
> > +            vpic->reference_frames[slot] = ref_pic->recon_surface;
> > +        }
> > +    }
> > +
> > +    /** pack frame header, and set va params offset like bit_offset_qindex etc. */
> > +    ret = vaapi_encode_av1_write_frame_header(avctx, pic, priv->fh_data, &priv->fh_data_len);
> > +    if (ret < 0)
> > +        goto end;
> > +
> > +end:
> > +    ff_cbs_fragment_reset(obu);
> > +    return ret;
> > +}
> > +
> > +static int vaapi_encode_av1_init_slice_params(AVCodecContext *avctx,
> > +                                              VAAPIEncodePicture *pic,
> > +                                              VAAPIEncodeSlice *slice)
> > +{
> > +    VAAPIEncodeAV1Context      *priv = avctx->priv_data;
> > +    VAEncTileGroupBufferAV1  *vslice = slice->codec_slice_params;
> > +    CodedBitstreamAV1Context  *cbctx = priv->cbc->priv_data;
> > +    int div;
> > +
> > +    /** Set tile group info. */
> > +    div = priv->tile_cols * priv->tile_rows / priv->tile_groups;
> > +    vslice->tg_start = slice->index * div;
> > +    if (slice->index == (priv->tile_groups - 1)) {
> > +        vslice->tg_end = priv->tile_cols * priv->tile_rows - 1;
> > +        cbctx->seen_frame_header = 0;
> > +    } else {
> > +        vslice->tg_end = (slice->index + 1) * div - 1;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +static int vaapi_encode_av1_write_picture_header(AVCodecContext *avctx,
> > +                                                 VAAPIEncodePicture *pic,
> > +                                                 char *data, size_t *data_len)
> > +{
> > +    VAAPIEncodeAV1Context     *priv = avctx->priv_data;
> > +    CodedBitstreamFragment     *obu = &priv->current_obu;
> > +    AV1RawOBU               *fh_obu = &priv->fh;
> > +    AV1RawFrameHeader       *rep_fh = &fh_obu->obu.frame_header;
> > +    VAAPIEncodeAV1Picture *href;
> > +    int ret = 0;
> > +
> > +    pic->tail_size = 0;
> > +    /** Pack repeat frame header. */
> > +    if (pic->display_order > pic->encode_order) {
> > +        memset(fh_obu, 0, sizeof(*fh_obu));
> > +        href = pic->refs[0][pic->nb_refs[0] - 1]->priv_data;
> > +        fh_obu->header.obu_type = AV1_OBU_FRAME_HEADER;
> > +        fh_obu->header.obu_has_size_field = 1;
> > +
> > +        rep_fh->show_existing_frame   = 1;
> > +        rep_fh->frame_to_show_map_idx = href->slot == 0;
> > +        rep_fh->frame_type            = AV1_FRAME_INTER;
> > +        rep_fh->frame_width_minus_1   = avctx->width - 1;
> > +        rep_fh->frame_height_minus_1  = avctx->height - 1;
> > +        rep_fh->render_width_minus_1  = rep_fh->frame_width_minus_1;
> > +        rep_fh->render_height_minus_1 = rep_fh->frame_height_minus_1;
> > +
> > +        ret = vaapi_encode_av1_write_frame_header(avctx, pic, pic->tail_data, &pic->tail_size);
> > +        if (ret < 0)
> > +            goto end;
> > +
> > +        pic->tail_size /= 8;
> > +    }
> > +
> > +    memcpy(data, &priv->fh_data, MAX_PARAM_BUFFER_SIZE * sizeof(char));
> > +    *data_len = priv->fh_data_len;
> > +
> > +end:
> > +    ff_cbs_fragment_reset(obu);
> > +    return ret;
> > +}
> > +
> > +static const VAAPIEncodeProfile vaapi_encode_av1_profiles[] = {
> > +    { FF_PROFILE_AV1_MAIN,  8, 3, 1, 1, VAProfileAV1Profile0 },
> > +    { FF_PROFILE_AV1_MAIN, 10, 3, 1, 1, VAProfileAV1Profile0 },
> > +    { FF_PROFILE_UNKNOWN }
> > +};
> > +
> > +static const VAAPIEncodeType vaapi_encode_type_av1 = {
> > +    .profiles        = vaapi_encode_av1_profiles,
> > +    .flags           = FLAG_B_PICTURES,
> > +    .default_quality = 25,
> > +    .configure       = &vaapi_encode_av1_configure,
> > +
> > +    .sequence_header_type  = VAEncPackedHeaderSequence,
> > +    .sequence_params_size  = sizeof(VAEncSequenceParameterBufferAV1),
> > +    .init_sequence_params  = &vaapi_encode_av1_init_sequence_params,
> > +    .write_sequence_header = &vaapi_encode_av1_write_sequence_header,
> > +
> > +    .picture_priv_data_size = sizeof(VAAPIEncodeAV1Picture),
> > +    .picture_header_type    = VAEncPackedHeaderPicture,
> > +    .picture_params_size    = sizeof(VAEncPictureParameterBufferAV1),
> > +    .init_picture_params    = &vaapi_encode_av1_init_picture_params,
> > +    .write_picture_header   = &vaapi_encode_av1_write_picture_header,
> > +
> > +    .slice_params_size = sizeof(VAEncTileGroupBufferAV1),
> > +    .init_slice_params = &vaapi_encode_av1_init_slice_params,
> > +};
> > +
> > +static av_cold int vaapi_encode_av1_init(AVCodecContext *avctx)
> > +{
> > +    VAAPIEncodeContext      *ctx = avctx->priv_data;
> > +    VAAPIEncodeAV1Context  *priv = avctx->priv_data;
> > +    VAConfigAttrib attr;
> > +    VAStatus vas;
> > +    int ret;
> > +
> > +    ctx->codec = &vaapi_encode_type_av1;
> > +
> > +    ctx->desired_packed_headers =
> > +        VA_ENC_PACKED_HEADER_SEQUENCE |
> > +        VA_ENC_PACKED_HEADER_PICTURE;
> > +
> > +    if (avctx->profile == FF_PROFILE_UNKNOWN)
> > +        avctx->profile = priv->profile;
> > +    if (avctx->level == FF_LEVEL_UNKNOWN)
> > +        avctx->level = priv->level;
> > +
> > +    if (avctx->level != FF_LEVEL_UNKNOWN && avctx->level & ~0x1f) {
> > +        av_log(avctx, AV_LOG_ERROR, "Invalid level %d\n", avctx->level);
> > +        return AVERROR(EINVAL);
> > +    }
> > +
> > +    ret = ff_vaapi_encode_init(avctx);
> > +    if (ret < 0)
> > +        return ret;
> > +
> > +    attr.type = VAConfigAttribEncAV1;
> > +    vas = vaGetConfigAttributes(ctx->hwctx->display,
> > +                                ctx->va_profile,
> > +                                ctx->va_entrypoint,
> > +                                &attr, 1);
> > +    if (vas != VA_STATUS_SUCCESS) {
> > +        av_log(avctx, AV_LOG_ERROR, "Failed to query "
> > +               "config attribute: %d (%s).\n", vas, vaErrorStr(vas));
> > +        return AVERROR_EXTERNAL;
> > +    } else if (attr.value == VA_ATTRIB_NOT_SUPPORTED) {
> > +        priv->attr.value = 0;
> > +        av_log(avctx, AV_LOG_WARNING, "Attribute type:%d is not "
> > +               "supported.\n", attr.type);
> > +    } else {
> > +        priv->attr.value = attr.value;
> > +    }
> > +
> > +    attr.type = VAConfigAttribEncAV1Ext1;
> > +    vas = vaGetConfigAttributes(ctx->hwctx->display,
> > +                                ctx->va_profile,
> > +                                ctx->va_entrypoint,
> > +                                &attr, 1);
> > +    if (vas != VA_STATUS_SUCCESS) {
> > +        av_log(avctx, AV_LOG_ERROR, "Failed to query "
> > +               "config attribute: %d (%s).\n", vas, vaErrorStr(vas));
> > +        return AVERROR_EXTERNAL;
> > +    } else if (attr.value == VA_ATTRIB_NOT_SUPPORTED) {
> > +        priv->attr_ext1.value = 0;
> > +        av_log(avctx, AV_LOG_WARNING, "Attribute type:%d is not "
> > +               "supported.\n", attr.type);
> > +    } else {
> > +        priv->attr_ext1.value = attr.value;
> > +    }
> > +
> > +    /** This attr provides essential indicators, return error if not support. */
> > +    attr.type = VAConfigAttribEncAV1Ext2;
> > +    vas = vaGetConfigAttributes(ctx->hwctx->display,
> > +                                ctx->va_profile,
> > +                                ctx->va_entrypoint,
> > +                                &attr, 1);
> > +    if (vas != VA_STATUS_SUCCESS || attr.value == VA_ATTRIB_NOT_SUPPORTED) {
> > +        av_log(avctx, AV_LOG_ERROR, "Failed to query "
> > +               "config attribute: %d (%s).\n", vas, vaErrorStr(vas));
> > +        return AVERROR_EXTERNAL;
> > +    } else {
> > +        priv->attr_ext2.value = attr.value;
> > +    }
> > +
> > +    ret = vaapi_encode_av1_set_tile(avctx);
> > +    if (ret < 0)
> > +        return ret;
> > +
> > +    return 0;
> > +}
> > +
> > +static av_cold int vaapi_encode_av1_close(AVCodecContext *avctx)
> > +{
> > +    VAAPIEncodeAV1Context *priv = avctx->priv_data;
> > +
> > +    ff_cbs_fragment_free(&priv->current_obu);
> > +    ff_cbs_close(&priv->cbc);
> > +
> > +    return ff_vaapi_encode_close(avctx);
> > +}
> > +
> > +#define OFFSET(x) offsetof(VAAPIEncodeAV1Context, x)
> > +#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM)
> > +
> > +static const AVOption vaapi_encode_av1_options[] = {
> > +    VAAPI_ENCODE_COMMON_OPTIONS,
> > +    VAAPI_ENCODE_RC_OPTIONS,
> > +    { "profile", "Set profile (seq_profile)",
> > +      OFFSET(profile), AV_OPT_TYPE_INT,
> > +      { .i64 = FF_PROFILE_UNKNOWN }, FF_PROFILE_UNKNOWN, 0xff, FLAGS, "profile" },
> > +
> > +#define PROFILE(name, value)  name, NULL, 0, AV_OPT_TYPE_CONST, \
> > +    { .i64 = value }, 0, 0, FLAGS, "profile"
> > +    { PROFILE("main",               FF_PROFILE_AV1_MAIN) },
> > +    { PROFILE("high",               FF_PROFILE_AV1_HIGH) },
> > +    { PROFILE("professional",       FF_PROFILE_AV1_PROFESSIONAL) },
> > +#undef PROFILE
> > +
> > +    { "tier", "Set tier (seq_tier)",
> > +      OFFSET(tier), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS, "tier" },
> > +    { "main", NULL, 0, AV_OPT_TYPE_CONST,
> > +      { .i64 = 0 }, 0, 0, FLAGS, "tier" },
> > +    { "high", NULL, 0, AV_OPT_TYPE_CONST,
> > +      { .i64 = 1 }, 0, 0, FLAGS, "tier" },
> > +    { "level", "Set level (seq_level_idx)",
> > +      OFFSET(level), AV_OPT_TYPE_INT,
> > +      { .i64 = FF_LEVEL_UNKNOWN }, FF_LEVEL_UNKNOWN, 0x1f, FLAGS, "level" },
> > +
> > +#define LEVEL(name, value) name, NULL, 0, AV_OPT_TYPE_CONST, \
> > +      { .i64 = value }, 0, 0, FLAGS, "level"
> > +    { LEVEL("2.0",  0) },
> > +    { LEVEL("2.1",  1) },
> > +    { LEVEL("3.0",  4) },
> > +    { LEVEL("3.1",  5) },
> > +    { LEVEL("4.0",  8) },
> > +    { LEVEL("4.1",  9) },
> > +    { LEVEL("5.0", 12) },
> > +    { LEVEL("5.1", 13) },
> > +    { LEVEL("5.2", 14) },
> > +    { LEVEL("5.3", 15) },
> > +    { LEVEL("6.0", 16) },
> > +    { LEVEL("6.1", 17) },
> > +    { LEVEL("6.2", 18) },
> > +    { LEVEL("6.3", 19) },
> > +#undef LEVEL
> > +
> > +    { "tiles", "Tile columns x rows",
> > +      OFFSET(tile_cols), AV_OPT_TYPE_IMAGE_SIZE, { .str = "1x1" }, 1, AV1_MAX_TILE_COLS, FLAGS },
> > +    { "tile_groups", "Number of tile groups for encoding",
> > +      OFFSET(tile_groups), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, AV1_MAX_TILE_ROWS * AV1_MAX_TILE_COLS, FLAGS },
> > +
> > +    { NULL },
> > +};
> > +
> > +static const FFCodecDefault vaapi_encode_av1_defaults[] = {
> > +    { "b",              "0"   },
> > +    { "bf",             "7"   },
> > +    { "g",              "120" },
> > +    { "qmin",           "1"   },
> > +    { "qmax",           "255" },
> > +    { NULL },
> > +};
> > +
> > +static const AVClass vaapi_encode_av1_class = {
> > +    .class_name = "av1_vaapi",
> > +    .item_name  = av_default_item_name,
> > +    .option     = vaapi_encode_av1_options,
> > +    .version    = LIBAVUTIL_VERSION_INT,
> > +};
> > +
> > +const FFCodec ff_av1_vaapi_encoder = {
> > +    .p.name         = "av1_vaapi",
> > +    CODEC_LONG_NAME("AV1 (VAAPI)"),
> > +    .p.type         = AVMEDIA_TYPE_VIDEO,
> > +    .p.id           = AV_CODEC_ID_AV1,
> > +    .priv_data_size = sizeof(VAAPIEncodeAV1Context),
> > +    .init           = &vaapi_encode_av1_init,
> > +    FF_CODEC_RECEIVE_PACKET_CB(&ff_vaapi_encode_receive_packet),
> > +    .close          = &vaapi_encode_av1_close,
> > +    .p.priv_class   = &vaapi_encode_av1_class,
> > +    .p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE |
> > +                      AV_CODEC_CAP_DR1 | AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE,
> > +    .caps_internal  = FF_CODEC_CAP_NOT_INIT_THREADSAFE |
> > +                      FF_CODEC_CAP_INIT_CLEANUP,
> > +    .defaults       = vaapi_encode_av1_defaults,
> > +    .p.pix_fmts = (const enum AVPixelFormat[]) {
> > +        AV_PIX_FMT_VAAPI,
> > +        AV_PIX_FMT_NONE,
> > +    },
> > +    .hw_configs     = ff_vaapi_encode_hw_configs,
> > +    .p.wrapper_name = "vaapi",
> > +};
> > --
> > 2.25.1
> >
>
> Seems sensible, but your sender address got rewritten. You might want
> to add an additional
>
> From: Fei Wang <fei.w.wang@intel.com>
>
> at the top of your commit message body if you can't get the "From"
> field in the emails to be set properly when you send to the list.
>
> Otherwise the patch series LGTM. I don't have any hardware to exercise
> this code with though, so that makes reviewing its behavior a little
> tough.
>
> Regardless, I can at least ack it.
>
> Acked-by: Neal Gompa <ngompa13@gmail.com>
>

Oops, forgot to actually CC you for this.
Wang, Fei W Aug. 1, 2023, 2:54 a.m. UTC | #3
On Mon, 2023-07-31 at 22:51 +0100, Neal Gompa wrote:
> On Mon, Jul 31, 2023 at 9:04 PM Neal Gompa <ngompa13@gmail.com>
> wrote:
> > On Tue, Jul 18, 2023 at 4:13 AM Fei Wang
> > <fei.w.wang-at-intel.com@ffmpeg.org> wrote:
> > > Signed-off-by: Fei Wang <fei.w.wang@intel.com>
> > > ---
> > > update
> > > 1. set color_range in sequence header.
> > > 
> > >  Changelog                     |    1 +
> > >  configure                     |    3 +
> > >  doc/encoders.texi             |   13 +
> > >  libavcodec/Makefile           |    1 +
> > >  libavcodec/allcodecs.c        |    1 +
> > >  libavcodec/vaapi_encode.c     |  125 +++-
> > >  libavcodec/vaapi_encode.h     |   12 +
> > >  libavcodec/vaapi_encode_av1.c | 1229
> > > +++++++++++++++++++++++++++++++++
> > >  8 files changed, 1367 insertions(+), 18 deletions(-)
> > >  create mode 100644 libavcodec/vaapi_encode_av1.c
> > > 
> > > diff --git a/Changelog b/Changelog
> > > index 3876082844..7ae9b85d52 100644
> > > --- a/Changelog
> > > +++ b/Changelog
> > > @@ -25,6 +25,7 @@ version <next>:
> > >  - Raw VVC bitstream parser, muxer and demuxer
> > >  - Bitstream filter for editing metadata in VVC streams
> > >  - Bitstream filter for converting VVC from MP4 to Annex B
> > > +- VAAPI AV1 encoder
> > > 
> > >  version 6.0:
> > >  - Radiance HDR image support
> > > diff --git a/configure b/configure
> > > index b018abf139..fbc729b5c6 100755
> > > --- a/configure
> > > +++ b/configure
> > > @@ -3323,6 +3323,8 @@ av1_qsv_decoder_select="qsvdec"
> > >  av1_qsv_encoder_select="qsvenc"
> > >  av1_qsv_encoder_deps="libvpl"
> > >  av1_amf_encoder_deps="amf"
> > > +av1_vaapi_encoder_deps="VAEncPictureParameterBufferAV1"
> > > +av1_vaapi_encoder_select="cbs_av1 vaapi_encode"
> > > 
> > >  # parsers
> > >  aac_parser_select="adts_header mpeg4audio"
> > > @@ -7104,6 +7106,7 @@ if enabled vaapi; then
> > >      check_type "va/va.h va/va_enc_jpeg.h"
> > > "VAEncPictureParameterBufferJPEG"
> > >      check_type "va/va.h
> > > va/va_enc_vp8.h"  "VAEncPictureParameterBufferVP8"
> > >      check_type "va/va.h
> > > va/va_enc_vp9.h"  "VAEncPictureParameterBufferVP9"
> > > +    check_type "va/va.h
> > > va/va_enc_av1.h"  "VAEncPictureParameterBufferAV1"
> > >  fi
> > > 
> > >  if enabled_all opencl libdrm ; then
> > > diff --git a/doc/encoders.texi b/doc/encoders.texi
> > > index 25d6b7f09e..fb331ebd8e 100644
> > > --- a/doc/encoders.texi
> > > +++ b/doc/encoders.texi
> > > @@ -3991,6 +3991,19 @@ Average variable bitrate.
> > >  Each encoder also has its own specific options:
> > >  @table @option
> > > 
> > > +@item av1_vaapi
> > > +@option{profile} sets the value of @emph{seq_profile}.
> > > +@option{tier} sets the value of @emph{seq_tier}.
> > > +@option{level} sets the value of @emph{seq_level_idx}.
> > > +
> > > +@table @option
> > > +@item tiles
> > > +Set the number of tiles to encode the input video with, as
> > > columns x rows.
> > > +(default is 1x1).
> > > +@item tile_groups
> > > +Set tile groups number (default is 1).
> > > +@end table
> > > +
> > >  @item h264_vaapi
> > >  @option{profile} sets the value of @emph{profile_idc} and the
> > > @emph{constraint_set*_flag}s.
> > >  @option{level} sets the value of @emph{level_idc}.
> > > diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> > > index 3cd5997e64..fe1e6aa99d 100644
> > > --- a/libavcodec/Makefile
> > > +++ b/libavcodec/Makefile
> > > @@ -259,6 +259,7 @@ OBJS-$(CONFIG_AV1_MEDIACODEC_DECODER)  +=
> > > mediacodecdec.o
> > >  OBJS-$(CONFIG_AV1_MEDIACODEC_ENCODER)  += mediacodecenc.o
> > >  OBJS-$(CONFIG_AV1_NVENC_ENCODER)       += nvenc_av1.o nvenc.o
> > >  OBJS-$(CONFIG_AV1_QSV_ENCODER)         += qsvenc_av1.o
> > > +OBJS-$(CONFIG_AV1_VAAPI_ENCODER)       += vaapi_encode_av1.o
> > > av1_profile_level.o
> > >  OBJS-$(CONFIG_AVRN_DECODER)            += avrndec.o
> > >  OBJS-$(CONFIG_AVRP_DECODER)            += r210dec.o
> > >  OBJS-$(CONFIG_AVRP_ENCODER)            += r210enc.o
> > > diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
> > > index 8775d15a4f..c43c1d7b48 100644
> > > --- a/libavcodec/allcodecs.c
> > > +++ b/libavcodec/allcodecs.c
> > > @@ -844,6 +844,7 @@ extern const FFCodec ff_av1_nvenc_encoder;
> > >  extern const FFCodec ff_av1_qsv_decoder;
> > >  extern const FFCodec ff_av1_qsv_encoder;
> > >  extern const FFCodec ff_av1_amf_encoder;
> > > +extern const FFCodec ff_av1_vaapi_encoder;
> > >  extern const FFCodec ff_libopenh264_encoder;
> > >  extern const FFCodec ff_libopenh264_decoder;
> > >  extern const FFCodec ff_h264_amf_encoder;
> > > diff --git a/libavcodec/vaapi_encode.c
> > > b/libavcodec/vaapi_encode.c
> > > index 2604f12b9e..2907e159fb 100644
> > > --- a/libavcodec/vaapi_encode.c
> > > +++ b/libavcodec/vaapi_encode.c
> > > @@ -669,6 +669,15 @@ static int
> > > vaapi_encode_set_output_timestamp(AVCodecContext *avctx,
> > >  {
> > >      VAAPIEncodeContext *ctx = avctx->priv_data;
> > > 
> > > +    // AV1 packs P frame and next B frame into one pkt, and uses
> > > the other
> > > +    // repeat frame header pkt at the display order position of
> > > the P frame
> > > +    // to indicate its frame index. Each frame has a
> > > corresponding pkt in its
> > > +    // display order position. So don't need to consider delay
> > > for AV1 timestamp.
> > > +    if (avctx->codec_id == AV_CODEC_ID_AV1) {
> > > +        pkt->dts = pkt->pts - ctx->dts_pts_diff;
> > > +        return 0;
> > > +    }
> > > +
> > >      if (ctx->output_delay == 0) {
> > >          pkt->dts = pkt->pts;
> > >      } else if (pic->encode_order < ctx->decode_delay) {
> > > @@ -689,9 +698,10 @@ static int
> > > vaapi_encode_output(AVCodecContext *avctx,
> > >  {
> > >      VAAPIEncodeContext *ctx = avctx->priv_data;
> > >      VACodedBufferSegment *buf_list, *buf;
> > > -    VAStatus vas;
> > > +    AVPacket *pkt_ptr = pkt;
> > >      int total_size = 0;
> > >      uint8_t *ptr;
> > > +    VAStatus vas;
> > >      int err;
> > > 
> > >      err = vaapi_encode_wait(avctx, pic);
> > > @@ -711,11 +721,52 @@ static int
> > > vaapi_encode_output(AVCodecContext *avctx,
> > >      for (buf = buf_list; buf; buf = buf->next)
> > >          total_size += buf->size;
> > > 
> > > -    err = ff_get_encode_buffer(avctx, pkt, total_size, 0);
> > > -    ptr = pkt->data;
> > > +    /** repack av1 coded frame for not display and repeat frames
> > > */
> > > +    if (avctx->codec_id == AV_CODEC_ID_AV1) {
> > > +        int display_frame = pic->display_order <= pic-
> > > >encode_order;
> > > 
> > > -    if (err < 0)
> > > -        goto fail_mapped;
> > > +        if (display_frame) {
> > > +            total_size += ctx->header_data_size;
> > > +            err = ff_get_encode_buffer(avctx, pkt, total_size,
> > > 0);
> > > +            if (err < 0)
> > > +                goto fail_mapped;
> > > +            ptr = pkt->data;
> > > +
> > > +            if (ctx->header_data_size) {
> > > +                memcpy(ptr, ctx->header_data, ctx-
> > > >header_data_size);
> > > +                ptr += ctx->header_data_size;
> > > +                ctx->header_data_size = 0;
> > > +            }
> > > +        } else {
> > > +            ctx->header_data = av_realloc(ctx->header_data,
> > > total_size);
> > > +            if (!ctx->header_data) {
> > > +                err = AVERROR(ENOMEM);
> > > +                goto fail_mapped;
> > > +            }
> > > +            ptr = ctx->header_data;
> > > +            ctx->header_data_size = total_size;
> > > +
> > > +            if (pic->tail_size) {
> > > +                if (ctx->tail_pkt->size) {
> > > +                    err = AVERROR(AVERROR_BUG);
> > > +                    goto fail_mapped;
> > > +                }
> > > +
> > > +                err = ff_get_encode_buffer(avctx, ctx->tail_pkt, 
> > > pic->tail_size, 0);
> > > +                if (err < 0)
> > > +                    goto fail_mapped;
> > > +
> > > +                memcpy(ctx->tail_pkt->data, pic->tail_data, pic-
> > > >tail_size);
> > > +                pkt_ptr = ctx->tail_pkt;
> > > +            }
> > > +        }
> > > +    } else {
> > > +        err = ff_get_encode_buffer(avctx, pkt, total_size, 0);
> > > +        ptr = pkt->data;
> > > +
> > > +        if (err < 0)
> > > +            goto fail_mapped;
> > > +    }
> > > 
> > >      for (buf = buf_list; buf; buf = buf->next) {
> > >          av_log(avctx, AV_LOG_DEBUG, "Output buffer: %u bytes "
> > > @@ -726,10 +777,10 @@ static int
> > > vaapi_encode_output(AVCodecContext *avctx,
> > >      }
> > > 
> > >      if (pic->type == PICTURE_TYPE_IDR)
> > > -        pkt->flags |= AV_PKT_FLAG_KEY;
> > > +        pkt_ptr->flags |= AV_PKT_FLAG_KEY;
> > > 
> > > -    pkt->pts = pic->pts;
> > > -    pkt->duration = pic->duration;
> > > +    pkt_ptr->pts = pic->pts;
> > > +    pkt_ptr->duration = pic->duration;
> > > 
> > >      vas = vaUnmapBuffer(ctx->hwctx->display, pic-
> > > >output_buffer);
> > >      if (vas != VA_STATUS_SUCCESS) {
> > > @@ -742,8 +793,8 @@ static int vaapi_encode_output(AVCodecContext
> > > *avctx,
> > >      // for no-delay encoders this is handled in generic codec
> > >      if (avctx->codec->capabilities & AV_CODEC_CAP_DELAY &&
> > >          avctx->flags & AV_CODEC_FLAG_COPY_OPAQUE) {
> > > -        pkt->opaque     = pic->opaque;
> > > -        pkt->opaque_ref = pic->opaque_ref;
> > > +        pkt_ptr->opaque     = pic->opaque;
> > > +        pkt_ptr->opaque_ref = pic->opaque_ref;
> > >          pic->opaque_ref = NULL;
> > >      }
> > > 
> > > @@ -752,6 +803,9 @@ static int vaapi_encode_output(AVCodecContext
> > > *avctx,
> > > 
> > >      av_log(avctx, AV_LOG_DEBUG, "Output read for pic
> > > %"PRId64"/%"PRId64".\n",
> > >             pic->display_order, pic->encode_order);
> > > +
> > > +    vaapi_encode_set_output_timestamp(avctx, pic, pkt_ptr);
> > > +
> > >      return 0;
> > > 
> > >  fail_mapped:
> > > @@ -1128,9 +1182,19 @@ static int
> > > vaapi_encode_pick_next(AVCodecContext *avctx,
> > > 
> > >      vaapi_encode_add_ref(avctx, pic, pic, 0, 1, 0);
> > >      if (pic->type != PICTURE_TYPE_IDR) {
> > > -        vaapi_encode_add_ref(avctx, pic, start,
> > > -                             pic->type == PICTURE_TYPE_P,
> > > -                             b_counter > 0, 0);
> > > +        // TODO: apply both previous and forward multi reference
> > > for all vaapi encoders.
> > > +        // And L0/L1 reference frame number can be set
> > > dynamically through query
> > > +        // VAConfigAttribEncMaxRefFrames attribute.
> > > +        if (avctx->codec_id == AV_CODEC_ID_AV1) {
> > > +            for (i = 0; i < ctx->nb_next_prev; i++)
> > > +                vaapi_encode_add_ref(avctx, pic, ctx-
> > > >next_prev[i],
> > > +                                     pic->type ==
> > > PICTURE_TYPE_P,
> > > +                                     b_counter > 0, 0);
> > > +        } else
> > > +            vaapi_encode_add_ref(avctx, pic, start,
> > > +                                 pic->type == PICTURE_TYPE_P,
> > > +                                 b_counter > 0, 0);
> > > +
> > >          vaapi_encode_add_ref(avctx, pic, ctx->next_prev[ctx-
> > > >nb_next_prev - 1], 0, 0, 1);
> > >      }
> > > 
> > > @@ -1292,6 +1356,19 @@ int
> > > ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket
> > > *pkt)
> > >      AVFrame *frame = ctx->frame;
> > >      int err;
> > > 
> > > +start:
> > > +    /** if no B frame before repeat P frame, sent repeat P frame
> > > out. */
> > > +    if (avctx->codec_id == AV_CODEC_ID_AV1 && ctx->tail_pkt-
> > > >size) {
> > > +        for (VAAPIEncodePicture *tmp = ctx->pic_start; tmp; tmp
> > > = tmp->next) {
> > > +            if (tmp->type == PICTURE_TYPE_B && tmp->pts < ctx-
> > > >tail_pkt->pts)
> > > +                break;
> > > +            else if (!tmp->next) {
> > > +                av_packet_move_ref(pkt, ctx->tail_pkt);
> > > +                goto end;
> > > +            }
> > > +        }
> > > +    }
> > > +
> > >      err = ff_encode_get_frame(avctx, frame);
> > >      if (err < 0 && err != AVERROR_EOF)
> > >          return err;
> > > @@ -1356,17 +1433,21 @@ int
> > > ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket
> > > *pkt)
> > >          return err;
> > >      }
> > > 
> > > -    vaapi_encode_set_output_timestamp(avctx, pic, pkt);
> > > -    av_log(avctx, AV_LOG_DEBUG, "Output packet: pts %"PRId64",
> > > dts %"PRId64", "
> > > -           "size %u bytes.\n", pkt->pts, pkt->dts, pkt->size);
> > > -
> > >      ctx->output_order = pic->encode_order;
> > >      vaapi_encode_clear_old(avctx);
> > > 
> > > +    /** loop to get an available pkt in encoder flushing. */
> > > +    if (ctx->end_of_stream && !pkt->size)
> > > +        goto start;
> > > +
> > > +end:
> > > +    if (pkt->size)
> > > +        av_log(avctx, AV_LOG_DEBUG, "Output packet: pts
> > > %"PRId64", dts %"PRId64", "
> > > +               "size %u bytes.\n", pkt->pts, pkt->dts, pkt-
> > > >size);
> > > +
> > >      return 0;
> > >  }
> > > 
> > > -
> > >  static av_cold void vaapi_encode_add_global_param(AVCodecContext
> > > *avctx, int type,
> > >                                                    void *buffer,
> > > size_t size)
> > >  {
> > > @@ -2667,6 +2748,12 @@ av_cold int
> > > ff_vaapi_encode_init(AVCodecContext *avctx)
> > >      ctx->device = (AVHWDeviceContext*)ctx->device_ref->data;
> > >      ctx->hwctx = ctx->device->hwctx;
> > > 
> > > +    ctx->tail_pkt = av_packet_alloc();
> > > +    if (!ctx->tail_pkt) {
> > > +        err = AVERROR(ENOMEM);
> > > +        goto fail;
> > > +    }
> > > +
> > >      err = vaapi_encode_profile_entrypoint(avctx);
> > >      if (err < 0)
> > >          goto fail;
> > > @@ -2859,9 +2946,11 @@ av_cold int
> > > ff_vaapi_encode_close(AVCodecContext *avctx)
> > >      }
> > > 
> > >      av_frame_free(&ctx->frame);
> > > +    av_packet_free(&ctx->tail_pkt);
> > > 
> > >      av_freep(&ctx->codec_sequence_params);
> > >      av_freep(&ctx->codec_picture_params);
> > > +    av_freep(&ctx->header_data);
> > >      av_fifo_freep2(&ctx->encode_fifo);
> > > 
> > >      av_buffer_unref(&ctx->recon_frames_ref);
> > > diff --git a/libavcodec/vaapi_encode.h
> > > b/libavcodec/vaapi_encode.h
> > > index d5452a37b3..03df8d6d46 100644
> > > --- a/libavcodec/vaapi_encode.h
> > > +++ b/libavcodec/vaapi_encode.h
> > > @@ -133,6 +133,11 @@ typedef struct VAAPIEncodePicture {
> > > 
> > >      int          nb_slices;
> > >      VAAPIEncodeSlice *slices;
> > > +
> > > +    /** Tail data of current pic, used only for repeat header of
> > > AV1. */
> > > +    char tail_data[MAX_PARAM_BUFFER_SIZE];
> > > +    /** Byte length of tail_data. */
> > > +    size_t tail_size;
> > >  } VAAPIEncodePicture;
> > > 
> > >  typedef struct VAAPIEncodeProfile {
> > > @@ -367,6 +372,13 @@ typedef struct VAAPIEncodeContext {
> > >      AVFifo          *encode_fifo;
> > >      // Max number of frame buffered in encoder.
> > >      int             async_depth;
> > > +
> > > +    /** Head data for current output pkt, used only for AV1. */
> > > +    void  *header_data;
> > > +    size_t header_data_size;
> > > +
> > > +    /** Store av1 repeat frame header pkt. */
> > > +    AVPacket *tail_pkt;
> > >  } VAAPIEncodeContext;
> > > 
> > >  enum {
> > > diff --git a/libavcodec/vaapi_encode_av1.c
> > > b/libavcodec/vaapi_encode_av1.c
> > > new file mode 100644
> > > index 0000000000..778c5b35c7
> > > --- /dev/null
> > > +++ b/libavcodec/vaapi_encode_av1.c
> > > @@ -0,0 +1,1229 @@
> > > +/*
> > > + * Copyright (c) 2023 Intel Corporation
> > > + *
> > > + * This file is part of FFmpeg.
> > > + *
> > > + * FFmpeg is free software; you can redistribute it and/or
> > > + * modify it under the terms of the GNU Lesser General Public
> > > + * License as published by the Free Software Foundation; either
> > > + * version 2.1 of the License, or (at your option) any later
> > > version.
> > > + *
> > > + * FFmpeg is distributed in the hope that it will be useful,
> > > + * but WITHOUT ANY WARRANTY; without even the implied warranty
> > > of
> > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > > GNU
> > > + * Lesser General Public License for more details.
> > > + *
> > > + * You should have received a copy of the GNU Lesser General
> > > Public
> > > + * License along with FFmpeg; if not, write to the Free Software
> > > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> > > 02110-1301 USA
> > > + */
> > > +
> > > +#include <va/va.h>
> > > +#include <va/va_enc_av1.h>
> > > +
> > > +#include "libavutil/pixdesc.h"
> > > +#include "libavutil/opt.h"
> > > +
> > > +#include "cbs_av1.h"
> > > +#include "put_bits.h"
> > > +#include "codec_internal.h"
> > > +#include "av1_profile_level.h"
> > > +#include "vaapi_encode.h"
> > > +
> > > +#define AV1_MAX_QUANT 255
> > > +
> > > +typedef struct VAAPIEncodeAV1Picture {
> > > +    int64_t last_idr_frame;
> > > +    int slot;
> > > +} VAAPIEncodeAV1Picture;
> > > +
> > > +typedef struct VAAPIEncodeAV1Context {
> > > +    VAAPIEncodeContext common;
> > > +    AV1RawOBU sh; /**< sequence header.*/
> > > +    AV1RawOBU fh; /**< frame header.*/
> > > +    CodedBitstreamContext *cbc;
> > > +    CodedBitstreamFragment current_obu;
> > > +    VAConfigAttribValEncAV1 attr;
> > > +    VAConfigAttribValEncAV1Ext1 attr_ext1;
> > > +    VAConfigAttribValEncAV1Ext2 attr_ext2;
> > > +
> > > +    char sh_data[MAX_PARAM_BUFFER_SIZE]; /**< coded sequence
> > > header data. */
> > > +    size_t sh_data_len; /**< bit length of sh_data. */
> > > +    char fh_data[MAX_PARAM_BUFFER_SIZE]; /**< coded frame header
> > > data. */
> > > +    size_t fh_data_len; /**< bit length of fh_data. */
> > > +
> > > +    uint8_t uniform_tile;
> > > +    uint8_t use_128x128_superblock;
> > > +    int sb_cols;
> > > +    int sb_rows;
> > > +    int tile_cols_log2;
> > > +    int tile_rows_log2;
> > > +    int max_tile_width_sb;
> > > +    int max_tile_height_sb;
> > > +    uint8_t width_in_sbs_minus_1[AV1_MAX_TILE_COLS];
> > > +    uint8_t height_in_sbs_minus_1[AV1_MAX_TILE_ROWS];
> > > +
> > > +    int min_log2_tile_cols;
> > > +    int max_log2_tile_cols;
> > > +    int min_log2_tile_rows;
> > > +    int max_log2_tile_rows;
> > > +
> > > +    int q_idx_idr;
> > > +    int q_idx_p;
> > > +    int q_idx_b;
> > > +
> > > +    /** user options */
> > > +    int profile;
> > > +    int level;
> > > +    int tier;
> > > +    int tile_cols, tile_rows;
> > > +    int tile_groups;
> > > +} VAAPIEncodeAV1Context;
> > > +
> > > +static av_cold int vaapi_encode_av1_configure(AVCodecContext
> > > *avctx)
> > > +{
> > > +    VAAPIEncodeContext     *ctx = avctx->priv_data;
> > > +    VAAPIEncodeAV1Context *priv = avctx->priv_data;
> > > +    int ret;
> > > +
> > > +    ret = ff_cbs_init(&priv->cbc, AV_CODEC_ID_AV1, avctx);
> > > +    if (ret < 0)
> > > +        return ret;
> > > +
> > > +    if (ctx->rc_mode->quality) {
> > > +        priv->q_idx_p = av_clip(ctx->rc_quality, 0,
> > > AV1_MAX_QUANT);
> > > +        if (fabs(avctx->i_quant_factor) > 0.0)
> > > +            priv->q_idx_idr =
> > > +                av_clip((fabs(avctx->i_quant_factor) * priv-
> > > >q_idx_p  +
> > > +                         avctx->i_quant_offset) + 0.5,
> > > +                        0, AV1_MAX_QUANT);
> > > +        else
> > > +            priv->q_idx_idr = priv->q_idx_p;
> > > +
> > > +        if (fabs(avctx->b_quant_factor) > 0.0)
> > > +            priv->q_idx_b =
> > > +                av_clip((fabs(avctx->b_quant_factor) * priv-
> > > >q_idx_p  +
> > > +                         avctx->b_quant_offset) + 0.5,
> > > +                        0, AV1_MAX_QUANT);
> > > +        else
> > > +            priv->q_idx_b = priv->q_idx_p;
> > > +    } else {
> > > +        /** Arbitrary value */
> > > +        priv->q_idx_idr = priv->q_idx_p = priv->q_idx_b = 128;
> > > +    }
> > > +
> > > +    return 0;
> > > +}
> > > +
> > > +static int vaapi_encode_av1_add_obu(AVCodecContext *avctx,
> > > +                                    CodedBitstreamFragment *au,
> > > +                                    uint8_t type,
> > > +                                    void *obu_unit)
> > > +{
> > > +    int ret;
> > > +
> > > +    ret = ff_cbs_insert_unit_content(au, -1,
> > > +                                     type, obu_unit, NULL);
> > > +    if (ret < 0) {
> > > +        av_log(avctx, AV_LOG_ERROR, "Failed to add OBU unit: "
> > > +               "type = %d.\n", type);
> > > +        return ret;
> > > +    }
> > > +
> > > +    return 0;
> > > +}
> > > +
> > > +static int vaapi_encode_av1_write_obu(AVCodecContext *avctx,
> > > +                                      char *data, size_t
> > > *data_len,
> > > +                                      CodedBitstreamFragment
> > > *bs)
> > > +{
> > > +    VAAPIEncodeAV1Context *priv = avctx->priv_data;
> > > +    int ret;
> > > +
> > > +    ret = ff_cbs_write_fragment_data(priv->cbc, bs);
> > > +    if (ret < 0) {
> > > +        av_log(avctx, AV_LOG_ERROR, "Failed to write packed
> > > header.\n");
> > > +        return ret;
> > > +    }
> > > +
> > > +    if ((size_t)8 * MAX_PARAM_BUFFER_SIZE < 8 * bs->data_size -
> > > bs->data_bit_padding) {
> > > +        av_log(avctx, AV_LOG_ERROR, "Access unit too large: "
> > > +               "%zu < %zu.\n", (size_t)8 *
> > > MAX_PARAM_BUFFER_SIZE,
> > > +               8 * bs->data_size - bs->data_bit_padding);
> > > +        return AVERROR(ENOSPC);
> > > +    }
> > > +
> > > +    memcpy(data, bs->data, bs->data_size);
> > > +    *data_len = 8 * bs->data_size - bs->data_bit_padding;
> > > +
> > > +    return 0;
> > > +}
> > > +
> > > +static int get_relative_dist(const AV1RawSequenceHeader *seq,
> > > +                             unsigned int a, unsigned int b)
> > > +{
> > > +    unsigned int diff, m;
> > > +    if (!seq->enable_order_hint)
> > > +        return 0;
> > > +    diff = a - b;
> > > +    m = 1 << seq->order_hint_bits_minus_1;
> > > +    diff = (diff & (m - 1)) - (diff & m);
> > > +    return diff;
> > > +}
> > > +
> > > +static int write_ns(PutBitContext *pbc, uint32_t n, uint32_t
> > > value)
> > > +{
> > > +    uint32_t w, m, v, extra_bit;
> > > +
> > > +    w = av_log2(n) + 1;
> > > +    m = (1 << w) - n;
> > > +
> > > +    if (value < m) {
> > > +        v = value;
> > > +        put_bits(pbc, w - 1, v);
> > > +    } else {
> > > +        v = m + ((value - m) >> 1);
> > > +        extra_bit = (value - m) & 1;
> > > +        put_bits(pbc, w - 1, v);
> > > +        put_bits(pbc, 1, extra_bit);
> > > +    }
> > > +
> > > +    return 0;
> > > +}
> > > +
> > > +/**
> > > + * This API provide the minmum implemention according current
> > > enabled features
> > > + * in frame header. If more features will enable in furture,
> > > please make sure
> > > + * the relative flags of features should be packed correctly
> > > into frame header
> > > + * obu in this API.
> > > + */
> > > +static int vaapi_encode_av1_write_frame_header(AVCodecContext
> > > *avctx,
> > > +                                               VAAPIEncodePictur
> > > e *pic,
> > > +                                               char *data,
> > > size_t *data_len)
> > > +{
> > > +    VAAPIEncodeContext              *ctx = avctx->priv_data;
> > > +    VAAPIEncodeAV1Context          *priv = avctx->priv_data;
> > > +    AV1RawOBU                    *fh_obu = &priv->fh;
> > > +    AV1RawOBU                    *sh_obu = &priv->sh;
> > > +    AV1RawFrameHeader                *fh = &fh_obu-
> > > >obu.frame.header;
> > > +    AV1RawSequenceHeader             *sh = &sh_obu-
> > > >obu.sequence_header;
> > > +    VAEncPictureParameterBufferAV1 *vpic = pic-
> > > >codec_picture_params;
> > > +    PutBitContext pbc, pbc_tmp;
> > > +    uint8_t byte;
> > > +    int qindex, coded_lossless;
> > > +    int id_len, frame_is_intra, skip_mode_allowed;
> > > +    int start, payload_bits, obu_size, obu_size_len;
> > > +    int qindex_offset, loopfilter_offset;
> > > +    int cdef_start_offset, cdef_end_offset;
> > > +    int i;
> > > +
> > > +    init_put_bits(&pbc, data, MAX_PARAM_BUFFER_SIZE);
> > > +
> > > +    /** obu header */
> > > +    put_bits(&pbc, 1, fh_obu->header.obu_forbidden_bit);
> > > +    put_bits(&pbc, 4, fh_obu->header.obu_type);
> > > +    put_bits(&pbc, 1, fh_obu->header.obu_extension_flag);
> > > +    put_bits(&pbc, 1, fh_obu->header.obu_has_size_field);
> > > +    put_bits(&pbc, 1, fh_obu->header.obu_reserved_1bit);
> > > +
> > > +    /** record pbc status to re-write obu size later. */
> > > +    if (fh_obu->header.obu_has_size_field) {
> > > +        pbc_tmp = pbc;
> > > +        put_bits32(&pbc, 0);
> > > +        put_bits32(&pbc, 0);
> > > +    }
> > > +
> > > +    start = put_bits_count(&pbc);
> > > +
> > > +    /** uncompressed_header() */
> > > +    if (sh->frame_id_numbers_present_flag)
> > > +        id_len = sh->additional_frame_id_length_minus_1 +
> > > +                 sh->delta_frame_id_length_minus_2 + 3;
> > > +
> > > +    frame_is_intra = (fh->frame_type == AV1_FRAME_KEY ||
> > > +                      fh->frame_type == AV1_FRAME_INTRA_ONLY);
> > > +    if (!sh->reduced_still_picture_header) {
> > > +        put_bits(&pbc, 1, fh->show_existing_frame);
> > > +        if (fh->show_existing_frame) {
> > > +            put_bits(&pbc, 3, fh->frame_to_show_map_idx);
> > > +            goto trailing_bits;
> > > +        }
> > > +        if (sh->frame_id_numbers_present_flag)
> > > +            put_bits(&pbc, id_len, fh->display_frame_id);
> > > +
> > > +        put_bits(&pbc, 2, fh->frame_type);
> > > +        put_bits(&pbc, 1, fh->show_frame);
> > > +        if (!fh->show_frame)
> > > +            put_bits(&pbc, 1, fh->showable_frame);
> > > +
> > > +        if (!(fh->frame_type == AV1_FRAME_SWITCH ||
> > > +            (fh->frame_type == AV1_FRAME_KEY && fh-
> > > >show_frame)))
> > > +            put_bits(&pbc, 1, fh->error_resilient_mode);
> > > +    }
> > > +
> > > +    put_bits(&pbc, 1, fh->disable_cdf_update);
> > > +    if (sh->seq_force_screen_content_tools ==
> > > AV1_SELECT_SCREEN_CONTENT_TOOLS)
> > > +        put_bits(&pbc, 1, fh->allow_screen_content_tools);
> > > +
> > > +    if (fh->allow_screen_content_tools && sh-
> > > >seq_force_integer_mv == AV1_SELECT_INTEGER_MV)
> > > +        put_bits(&pbc, 1, fh->force_integer_mv);
> > > +
> > > +    if (sh->frame_id_numbers_present_flag)
> > > +        put_bits(&pbc, id_len, fh->current_frame_id);
> > > +
> > > +    if (fh->frame_type != AV1_FRAME_SWITCH && !sh-
> > > >reduced_still_picture_header)
> > > +        put_bits(&pbc, 1, fh->frame_size_override_flag);
> > > +
> > > +    if (sh->enable_order_hint)
> > > +        put_bits(&pbc, sh->order_hint_bits_minus_1 + 1, fh-
> > > >order_hint);
> > > +
> > > +    if (!(frame_is_intra || fh->error_resilient_mode))
> > > +        put_bits(&pbc, 3, fh->primary_ref_frame);
> > > +
> > > +    if (!(fh->frame_type == AV1_FRAME_SWITCH ||
> > > +        fh->frame_type == AV1_FRAME_KEY && fh->show_frame))
> > > +        put_bits(&pbc, 8, fh->refresh_frame_flags);
> > > +
> > > +    if (frame_is_intra) {
> > > +        /** render_size() */
> > > +         put_bits(&pbc, 1, fh->render_and_frame_size_different);
> > > +    } else {
> > > +        if (!frame_is_intra && sh->enable_order_hint)
> > > +            put_bits(&pbc, 1, fh->frame_refs_short_signaling);
> > > +
> > > +        for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
> > > +            if (!fh->frame_refs_short_signaling)
> > > +                put_bits(&pbc, 3, fh->ref_frame_idx[i]);
> > > +        }
> > > +
> > > +        if (!(fh->frame_size_override_flag && !fh-
> > > >error_resilient_mode))
> > > +            put_bits(&pbc, 1, fh-
> > > >render_and_frame_size_different);
> > > +
> > > +        if (!fh->force_integer_mv)
> > > +            put_bits(&pbc, 1, fh->allow_high_precision_mv);
> > > +
> > > +        /** read_interpolation_filter() */
> > > +        put_bits(&pbc, 1, fh->is_filter_switchable);
> > > +        if (!fh->is_filter_switchable)
> > > +            put_bits(&pbc, 2, fh->interpolation_filter);
> > > +
> > > +        put_bits(&pbc, 1, fh->is_motion_mode_switchable);
> > > +    }
> > > +
> > > +    if (!(sh->reduced_still_picture_header || fh-
> > > >disable_cdf_update))
> > > +        put_bits(&pbc, 1, fh->disable_frame_end_update_cdf);
> > > +
> > > +    /** tile_info() */
> > > +    put_bits(&pbc, 1, fh->uniform_tile_spacing_flag);
> > > +    if (fh->uniform_tile_spacing_flag) {
> > > +        for (i = 0; i < priv->tile_cols_log2 - priv-
> > > >min_log2_tile_cols; i++) {
> > > +            put_bits(&pbc, 1, 1);
> > > +        }
> > > +        if (priv->tile_cols_log2 < priv->max_log2_tile_cols)
> > > +            put_bits(&pbc, 1, 0);
> > > +
> > > +        for (i = 0; i < priv->tile_rows_log2 - priv-
> > > >min_log2_tile_rows; i++) {
> > > +            put_bits(&pbc, 1, 1);
> > > +        }
> > > +        if (priv->tile_rows_log2 < priv->max_log2_tile_rows)
> > > +            put_bits(&pbc, 1, 0);
> > > +    } else {
> > > +        int start_sb = 0;
> > > +        int max_width, max_height;
> > > +        for (i = 0; start_sb < priv->sb_cols; i++) {
> > > +            max_width = FFMIN(priv->sb_cols - start_sb, priv-
> > > >max_tile_width_sb);
> > > +            write_ns(&pbc, max_width, fh-
> > > >width_in_sbs_minus_1[i]);
> > > +            start_sb += fh->width_in_sbs_minus_1[i] + 1;
> > > +        }
> > > +
> > > +        start_sb = 0;
> > > +        for (i = 0; start_sb < priv->sb_rows; i++) {
> > > +            max_height = FFMIN(priv->sb_rows - start_sb, priv-
> > > >max_tile_height_sb);
> > > +            write_ns(&pbc, max_height, fh-
> > > >height_in_sbs_minus_1[i]);
> > > +            start_sb += fh->height_in_sbs_minus_1[i] + 1;
> > > +        }
> > > +    }
> > > +
> > > +    if (priv->tile_cols_log2 || priv->tile_rows_log2) {
> > > +        put_bits(&pbc, priv->tile_cols_log2 + priv-
> > > >tile_rows_log2, fh->context_update_tile_id);
> > > +        put_bits(&pbc, 2, fh->tile_size_bytes_minus1);
> > > +    }
> > > +
> > > +    qindex_offset = put_bits_count(&pbc);
> > > +    /** quantization_params() */
> > > +    put_bits(&pbc, 8, fh->base_q_idx);
> > > +    put_bits(&pbc, 1, fh->delta_q_y_dc);
> > > +    put_bits(&pbc, 1, fh->delta_q_u_dc);
> > > +    put_bits(&pbc, 1, fh->delta_q_u_ac);
> > > +    put_bits(&pbc, 1, fh->using_qmatrix);
> > > +
> > > +    /** segmentation_params() */
> > > +    put_bits(&pbc, 1, fh->segmentation_enabled);
> > > +
> > > +    /** delta_q_params() */
> > > +    if (fh->base_q_idx)
> > > +        put_bits(&pbc, 1, fh->delta_q_present);
> > > +    if (fh->delta_q_present)
> > > +        put_bits(&pbc, 2, fh->delta_q_res);
> > > +
> > > +    /** delta_lf_params() */
> > > +    if (fh->delta_q_present) {
> > > +        if (!fh->allow_intrabc)
> > > +            put_bits(&pbc, 1, fh->delta_lf_present);
> > > +        if (fh->delta_lf_present) {
> > > +            put_bits(&pbc, 2, fh->delta_lf_res);
> > > +            put_bits(&pbc, 1, fh->delta_lf_multi);
> > > +        }
> > > +    }
> > > +
> > > +    /** codelossless */
> > > +    coded_lossless = 1;
> > > +    for (i = 0; i < AV1_MAX_SEGMENTS; i++) {
> > > +        if (fh->segmentation_enabled && fh-
> > > >feature_enabled[i][AV1_SEG_LVL_ALT_Q])
> > > +            qindex = fh->base_q_idx + fh-
> > > >feature_value[i][AV1_SEG_LVL_ALT_Q];
> > > +        else
> > > +            qindex = fh->base_q_idx;
> > > +        qindex = av_clip_uintp2(qindex, 8);
> > > +
> > > +        if (qindex || fh->delta_q_y_dc || fh->delta_q_u_ac ||
> > > fh->delta_q_u_dc ||
> > > +            fh->delta_q_v_ac || fh->delta_q_v_dc)
> > > +            coded_lossless = 0;
> > > +    }
> > > +
> > > +    loopfilter_offset = put_bits_count(&pbc);
> > > +    /** loop_filter_params() */
> > > +    if (!(coded_lossless || fh->allow_intrabc)) {
> > > +        put_bits(&pbc, 6, fh->loop_filter_level[0]);
> > > +        put_bits(&pbc, 6, fh->loop_filter_level[1]);
> > > +        if (fh->loop_filter_level[0] || fh-
> > > >loop_filter_level[1]) {
> > > +            put_bits(&pbc, 6, fh->loop_filter_level[2]);
> > > +            put_bits(&pbc, 6, fh->loop_filter_level[3]);
> > > +        }
> > > +        put_bits(&pbc, 3, fh->loop_filter_sharpness);
> > > +        put_bits(&pbc, 1, fh->loop_filter_delta_enabled);
> > > +    }
> > > +
> > > +    cdef_start_offset = put_bits_count(&pbc);
> > > +    /** cdef_params() */
> > > +    if (!(coded_lossless || fh->allow_intrabc || !sh-
> > > >enable_cdef)) {
> > > +        put_bits(&pbc, 2, fh->cdef_damping_minus_3);
> > > +        put_bits(&pbc, 2, fh->cdef_bits);
> > > +        for (i = 0; i < (1 << fh->cdef_bits); i++) {
> > > +            put_bits(&pbc, 4, fh->cdef_y_pri_strength[i]);
> > > +            put_bits(&pbc, 2, fh->cdef_y_sec_strength[i]);
> > > +            put_bits(&pbc, 4, fh->cdef_uv_pri_strength[i]);
> > > +            put_bits(&pbc, 2, fh->cdef_uv_sec_strength[i]);
> > > +        }
> > > +    }
> > > +    cdef_end_offset = put_bits_count(&pbc);
> > > +
> > > +    /** read_tx_mode() */
> > > +    if (fh->tx_mode == AV1_TX_MODE_SELECT)
> > > +        put_bits(&pbc, 1, 1);
> > > +    else
> > > +        put_bits(&pbc, 1, 0);
> > > +
> > > +    /** frame_reference_mode() */
> > > +    if (!frame_is_intra)
> > > +        put_bits(&pbc, 1, fh->reference_select);
> > > +
> > > +    /** skip_mode_params() */
> > > +    if (frame_is_intra || !fh->reference_select || !sh-
> > > >enable_order_hint)
> > > +        skip_mode_allowed = 0;
> > > +    else {
> > > +        int forward_idx, backward_idx;
> > > +        int ref_hint, forward_hint, backward_hint;
> > > +
> > > +        forward_idx = -1;
> > > +        backward_idx = -1;
> > > +
> > > +        for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
> > > +            ref_hint = fh->ref_order_hint[fh->ref_frame_idx[i]];
> > > +            if (get_relative_dist(sh, ref_hint, fh->order_hint)
> > > < 0) {
> > > +                if (forward_idx < 0 || get_relative_dist(sh,
> > > ref_hint, forward_idx) > 0) {
> > > +                    forward_idx = i;
> > > +                    forward_hint = ref_hint;
> > > +                }
> > > +            } else if (get_relative_dist(sh, ref_hint, fh-
> > > >order_hint) > 0) {
> > > +                if (backward_idx < 0 || get_relative_dist(sh,
> > > ref_hint, backward_hint) < 0) {
> > > +                    backward_idx = i;
> > > +                    backward_hint = ref_hint;
> > > +                }
> > > +            }
> > > +        }
> > > +        if (forward_idx < 0)
> > > +            skip_mode_allowed = 0;
> > > +        else if (backward_idx >= 0)
> > > +            skip_mode_allowed = 1;
> > > +        else {
> > > +            int second_forward_idx, second_forward_hint;
> > > +            second_forward_idx = -1;
> > > +            for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
> > > +                ref_hint = fh->ref_order_hint[fh-
> > > >ref_frame_idx[i]];
> > > +                if (get_relative_dist(sh, ref_hint,
> > > forward_hint) < 0) {
> > > +                    if (second_forward_idx < 0 ||
> > > get_relative_dist(sh, ref_hint, second_forward_hint) > 0){
> > > +                        second_forward_idx = i;
> > > +                        second_forward_hint = ref_hint;
> > > +                    }
> > > +                }
> > > +            }
> > > +            if (second_forward_idx < 0)
> > > +                skip_mode_allowed = 0;
> > > +            else
> > > +                skip_mode_allowed = 1;
> > > +        }
> > > +    }
> > > +
> > > +    if (skip_mode_allowed)
> > > +        put_bits(&pbc, 1, fh->skip_mode_present);
> > > +
> > > +    put_bits(&pbc, 1, fh->reduced_tx_set);
> > > +
> > > +    /** global_motion_params() */
> > > +    if (!frame_is_intra) {
> > > +        for (i = AV1_REF_FRAME_LAST; i <= AV1_REF_FRAME_ALTREF;
> > > i++) {
> > > +            put_bits(&pbc, 1, fh->is_global[i]);
> > > +            if (fh->is_global[i]) {
> > > +                put_bits(&pbc, 1, fh->is_rot_zoom[i]);
> > > +                if (!fh->is_rot_zoom[i])
> > > +                    put_bits(&pbc, 1, fh->is_translation[i]);
> > > +            }
> > > +        }
> > > +    }
> > > +
> > > +trailing_bits:
> > > +    payload_bits = put_bits_count(&pbc) - start;
> > > +
> > > +    /** trailing_bits() */
> > > +    put_bits(&pbc, 1, 1);
> > > +    obu_size = (put_bits_count(&pbc) - start + 7) / 8;
> > > +    for (i = 0; i < obu_size * 8 - payload_bits - 1; i++)
> > > +        put_bits(&pbc, 1, 0);
> > > +
> > > +    flush_put_bits(&pbc);
> > > +    *data_len = put_bits_count(&pbc);
> > > +
> > > +    /** update obu size in bitstream */
> > > +    if (fh_obu->header.obu_has_size_field) {
> > > +        obu_size_len = priv-
> > > >attr_ext2.bits.obu_size_bytes_minus1 + 1;
> > > +        for (i = 0; i < obu_size_len; i++) {
> > > +            byte = obu_size >> (7 * i) & 0x7f;
> > > +            if (i < obu_size_len - 1)
> > > +                byte |= 0x80;
> > > +            put_bits(&pbc_tmp, 8, byte);
> > > +        }
> > > +        flush_put_bits(&pbc_tmp);
> > > +        memmove(pbc_tmp.buf_ptr, pbc_tmp.buf_ptr + (8 -
> > > obu_size_len), obu_size);
> > > +        *data_len -= (8 - obu_size_len) * 8;
> > > +    }
> > > +
> > > +    if (fh->show_existing_frame)
> > > +        return 0;
> > > +
> > > +    if (!(ctx->va_rc_mode & VA_RC_CQP)) {
> > > +        vpic->min_base_qindex = av_clip(avctx->qmin, 1,
> > > AV1_MAX_QUANT);
> > > +        vpic->max_base_qindex = av_clip(avctx->qmax, 1,
> > > AV1_MAX_QUANT);
> > > +
> > > +        vpic->bit_offset_qindex            = qindex_offset - (8
> > > - obu_size_len) * 8;
> > > +        vpic->bit_offset_loopfilter_params = loopfilter_offset -
> > > (8 - obu_size_len) * 8;
> > > +        vpic->bit_offset_cdef_params       = cdef_start_offset -
> > > (8 - obu_size_len) * 8;
> > > +        vpic->size_in_bits_cdef_params     = cdef_end_offset -
> > > cdef_start_offset;
> > > +        vpic->size_in_bits_frame_hdr_obu   = *data_len;
> > > +
> > > +        vpic->byte_offset_frame_hdr_obu_size = (((pic->type ==
> > > PICTURE_TYPE_IDR) ?
> > > +                                               priv->sh_data_len 
> > > / 8 : 0) +
> > > +                                               (fh_obu-
> > > >header.obu_extension_flag ?
> > > +                                               2 : 1));
> > > +    }
> > > +
> > > +    return 0;
> > > +}
> > > +
> > > +static int tile_log2(int blkSize, int target) {
> > > +    int k;
> > > +    for (k = 0; (blkSize << k) < target; k++);
> > > +    return k;
> > > +}
> > > +
> > > +static int vaapi_encode_av1_set_tile(AVCodecContext *avctx)
> > > +{
> > > +    VAAPIEncodeAV1Context *priv = avctx->priv_data;
> > > +    int mi_cols, mi_rows, sb_shift, sb_size;
> > > +    int max_tile_area_sb, max_tile_area_sb_varied;
> > > +    int tile_width_sb, tile_height_sb, widest_tile_sb;
> > > +    int min_log2_tiles;
> > > +    int tile_rows_tmp, i;
> > > +
> > > +    if (priv->tile_cols > AV1_MAX_TILE_COLS ||
> > > +        priv->tile_rows > AV1_MAX_TILE_ROWS) {
> > > +        av_log(avctx, AV_LOG_ERROR, "Invalid tile number %dx%d,
> > > should less than %dx%d.\n",
> > > +               priv->tile_cols, priv->tile_rows,
> > > AV1_MAX_TILE_COLS, AV1_MAX_TILE_ROWS);
> > > +        return AVERROR(EINVAL);
> > > +    }
> > > +
> > > +    mi_cols = 2 * ((avctx->width + 7) >> 3);
> > > +    mi_rows = 2 * ((avctx->height + 7) >> 3);
> > > +    priv->sb_cols = priv->use_128x128_superblock ?
> > > +                    ((mi_cols + 31) >> 5) : ((mi_cols + 15) >>
> > > 4);
> > > +    priv->sb_rows = priv->use_128x128_superblock ?
> > > +                    ((mi_rows + 31) >> 5) : ((mi_rows + 15) >>
> > > 4);
> > > +    sb_shift = priv->use_128x128_superblock ? 5 : 4;
> > > +    sb_size  = sb_shift + 2;
> > > +    priv->max_tile_width_sb = AV1_MAX_TILE_WIDTH >> sb_size;
> > > +    max_tile_area_sb = AV1_MAX_TILE_AREA  >> (2 * sb_size);
> > > +
> > > +    priv->min_log2_tile_cols = tile_log2(priv-
> > > >max_tile_width_sb, priv->sb_cols);
> > > +    priv->max_log2_tile_cols = tile_log2(1, FFMIN(priv->sb_cols, 
> > > AV1_MAX_TILE_COLS));
> > > +    priv->max_log2_tile_rows = tile_log2(1, FFMIN(priv->sb_rows, 
> > > AV1_MAX_TILE_ROWS));
> > > +    min_log2_tiles = FFMAX(priv->min_log2_tile_cols,
> > > +                           tile_log2(max_tile_area_sb, priv-
> > > >sb_rows * priv->sb_cols));
> > > +
> > > +    if (priv->tile_cols != av_clip(priv->tile_cols, (priv-
> > > >sb_cols + priv->max_tile_width_sb - 1) / priv-
> > > >max_tile_width_sb, priv->sb_cols)) {
> > > +        priv->tile_cols = av_clip(priv->tile_cols, (priv-
> > > >sb_cols + priv->max_tile_width_sb - 1) / priv-
> > > >max_tile_width_sb, priv->sb_cols);
> > > +        av_log(avctx, AV_LOG_WARNING, "Invalid tile cols,
> > > correct to %d.\n", priv->tile_cols);
> > > +    }
> > > +
> > > +    priv->tile_cols_log2 = tile_log2(1, priv->tile_cols);
> > > +    tile_width_sb = (priv->sb_cols + (1 << priv->tile_cols_log2) 
> > > - 1) >>
> > > +                    priv->tile_cols_log2;
> > > +
> > > +    if (priv->tile_rows > priv->sb_rows) {
> > > +        priv->tile_rows = priv->sb_rows;
> > > +        av_log(avctx, AV_LOG_WARNING, "Invalid tile rows,
> > > correct to %d.\n", priv->tile_rows);
> > > +    }
> > > +
> > > +    for (tile_rows_tmp = priv->tile_rows; tile_rows_tmp <= priv-
> > > >sb_rows && tile_rows_tmp <= AV1_MAX_TILE_ROWS; tile_rows_tmp++)
> > > {
> > > +        /** try uniformed tile. */
> > > +        priv->tile_rows_log2 = tile_log2(1, tile_rows_tmp);
> > > +        if ((priv->sb_cols + tile_width_sb - 1) / tile_width_sb
> > > == priv->tile_cols) {
> > > +            for (i = 0; i < priv->tile_cols - 1; i++)
> > > +                priv->width_in_sbs_minus_1[i] = tile_width_sb -
> > > 1;
> > > +            priv->width_in_sbs_minus_1[i] = priv->sb_cols -
> > > (priv->tile_cols - 1) * tile_width_sb - 1;
> > > +
> > > +            tile_height_sb = (priv->sb_rows + (1 << priv-
> > > >tile_rows_log2) - 1) >>
> > > +                             priv->tile_rows_log2;
> > > +
> > > +            if ((priv->sb_rows + tile_height_sb - 1) /
> > > tile_height_sb == tile_rows_tmp &&
> > > +                tile_height_sb <= max_tile_area_sb /
> > > tile_width_sb) {
> > > +                for (i = 0; i < tile_rows_tmp - 1; i++)
> > > +                    priv->height_in_sbs_minus_1[i] =
> > > tile_height_sb - 1;
> > > +                priv->height_in_sbs_minus_1[i] = priv->sb_rows -
> > > (tile_rows_tmp - 1) * tile_height_sb - 1;
> > > +
> > > +                priv->uniform_tile = 1;
> > > +                priv->min_log2_tile_rows = FFMAX(min_log2_tiles
> > > - priv->tile_cols_log2, 0);
> > > +
> > > +                break;
> > > +            }
> > > +        }
> > > +
> > > +        /** try non-uniformed tile. */
> > > +        widest_tile_sb = 0;
> > > +        for (i = 0; i < priv->tile_cols; i++) {
> > > +            priv->width_in_sbs_minus_1[i] = (i + 1) * priv-
> > > >sb_cols / priv->tile_cols - i * priv->sb_cols / priv->tile_cols
> > > - 1;
> > > +            widest_tile_sb = FFMAX(widest_tile_sb, priv-
> > > >width_in_sbs_minus_1[i] + 1);
> > > +        }
> > > +
> > > +        if (min_log2_tiles)
> > > +            max_tile_area_sb_varied = (priv->sb_rows * priv-
> > > >sb_cols) >> (min_log2_tiles + 1);
> > > +        else
> > > +            max_tile_area_sb_varied = priv->sb_rows * priv-
> > > >sb_cols;
> > > +        priv->max_tile_height_sb = FFMAX(1,
> > > max_tile_area_sb_varied / widest_tile_sb);
> > > +
> > > +        if (tile_rows_tmp == av_clip(tile_rows_tmp, (priv-
> > > >sb_rows + priv->max_tile_height_sb - 1) / priv-
> > > >max_tile_height_sb, priv->sb_rows)) {
> > > +            for (i = 0; i < tile_rows_tmp; i++)
> > > +                priv->height_in_sbs_minus_1[i] = (i + 1) * priv-
> > > >sb_rows / tile_rows_tmp - i * priv->sb_rows / tile_rows_tmp - 1;
> > > +
> > > +            break;
> > > +        }
> > > +    }
> > > +
> > > +    if (priv->tile_rows != tile_rows_tmp) {
> > > +        priv->tile_rows = tile_rows_tmp;
> > > +        av_log(avctx, AV_LOG_WARNING, "Invalid tile rows,
> > > correct to %d.\n", priv->tile_rows);
> > > +    }
> > > +
> > > +    /** check if tile cols/rows is supported by driver. */
> > > +    if (priv->attr_ext2.bits.max_tile_num_minus1) {
> > > +        if ((priv->tile_cols * priv->tile_rows - 1) > priv-
> > > >attr_ext2.bits.max_tile_num_minus1) {
> > > +            av_log(avctx, AV_LOG_ERROR, "Unsupported tile num %d
> > > * %d = %d by driver, "
> > > +                   "should be less than %d.\n", priv->tile_cols, 
> > > priv->tile_rows,
> > > +                   priv->tile_cols * priv->tile_rows,
> > > +                   priv->attr_ext2.bits.max_tile_num_minus1 +
> > > 1);
> > > +            return AVERROR(EINVAL);
> > > +        }
> > > +    }
> > > +    av_log(avctx, AV_LOG_DEBUG, "Setting tile cols/rows to
> > > %d/%d.\n",
> > > +           priv->tile_cols, priv->tile_rows);
> > > +
> > > +    /** check if tile group numbers is valid. */
> > > +    if (priv->tile_groups > priv->tile_cols * priv->tile_rows) {
> > > +        av_log(avctx, AV_LOG_WARNING, "Invalid tile groups
> > > number %d, "
> > > +        "correct to %d.\n", priv->tile_groups, priv->tile_cols *
> > > priv->tile_rows);
> > > +        priv->tile_groups = priv->tile_cols * priv->tile_rows;
> > > +    }
> > > +
> > > +    return 0;
> > > +}
> > > +
> > > +static int vaapi_encode_av1_write_sequence_header(AVCodecContext
> > > *avctx,
> > > +                                                  char *data,
> > > size_t *data_len)
> > > +{
> > > +    VAAPIEncodeAV1Context *priv = avctx->priv_data;
> > > +
> > > +    memcpy(data, &priv->sh_data, MAX_PARAM_BUFFER_SIZE *
> > > sizeof(char));
> > > +    *data_len = priv->sh_data_len;
> > > +
> > > +    return 0;
> > > +}
> > > +
> > > +static int vaapi_encode_av1_init_sequence_params(AVCodecContext
> > > *avctx)
> > > +{
> > > +    VAAPIEncodeContext               *ctx = avctx->priv_data;
> > > +    VAAPIEncodeAV1Context           *priv = avctx->priv_data;
> > > +    AV1RawOBU                     *sh_obu = &priv->sh;
> > > +    AV1RawSequenceHeader              *sh = &sh_obu-
> > > >obu.sequence_header;
> > > +    VAEncSequenceParameterBufferAV1 *vseq = ctx-
> > > >codec_sequence_params;
> > > +    CodedBitstreamFragment           *obu = &priv->current_obu;
> > > +    const AVPixFmtDescriptor *desc;
> > > +    int ret;
> > > +
> > > +    memset(sh_obu, 0, sizeof(*sh_obu));
> > > +    sh_obu->header.obu_type = AV1_OBU_SEQUENCE_HEADER;
> > > +
> > > +    desc = av_pix_fmt_desc_get(priv->common.input_frames-
> > > >sw_format);
> > > +    av_assert0(desc);
> > > +
> > > +    sh->seq_profile  = avctx->profile;
> > > +    if (!sh->seq_force_screen_content_tools)
> > > +        sh->seq_force_integer_mv = AV1_SELECT_INTEGER_MV;
> > > +    sh->frame_width_bits_minus_1  = av_log2(avctx->width);
> > > +    sh->frame_height_bits_minus_1 = av_log2(avctx->height);
> > > +    sh->max_frame_width_minus_1   = avctx->width - 1;
> > > +    sh->max_frame_height_minus_1  = avctx->height - 1;
> > > +    sh->seq_tier[0]               = priv->tier;
> > > +    /** enable order hint and reserve maximum 8 bits for it by
> > > default. */
> > > +    sh->enable_order_hint         = 1;
> > > +    sh->order_hint_bits_minus_1   = 7;
> > > +
> > > +    sh->color_config = (AV1RawColorConfig) {
> > > +        .high_bitdepth                  = desc->comp[0].depth ==
> > > 8 ? 0 : 1,
> > > +        .color_primaries                = avctx-
> > > >color_primaries,
> > > +        .transfer_characteristics       = avctx->color_trc,
> > > +        .matrix_coefficients            = avctx->colorspace,
> > > +        .color_description_present_flag = (avctx-
> > > >color_primaries != AVCOL_PRI_UNSPECIFIED ||
> > > +                                           avctx-
> > > >color_trc       != AVCOL_TRC_UNSPECIFIED ||
> > > +                                           avctx-
> > > >colorspace      != AVCOL_SPC_UNSPECIFIED),
> > > +        .color_range                    = avctx->color_range ==
> > > AVCOL_RANGE_JPEG,
> > > +        .subsampling_x                  = desc->log2_chroma_w,
> > > +        .subsampling_y                  = desc->log2_chroma_h,
> > > +    };
> > > +
> > > +    if (avctx->level != FF_LEVEL_UNKNOWN) {
> > > +        sh->seq_level_idx[0] = avctx->level;
> > > +    } else {
> > > +        const AV1LevelDescriptor *level;
> > > +        float framerate;
> > > +
> > > +        if (avctx->framerate.num > 0 && avctx->framerate.den >
> > > 0)
> > > +            framerate = avctx->framerate.num / avctx-
> > > >framerate.den;
> > > +        else
> > > +            framerate = 0;
> > > +
> > > +        level = ff_av1_guess_level(avctx->bit_rate, priv->tier,
> > > +                                   ctx->surface_width, ctx-
> > > >surface_height,
> > > +                                   priv->tile_rows * priv-
> > > >tile_cols,
> > > +                                   priv->tile_cols, framerate);
> > > +        if (level) {
> > > +            av_log(avctx, AV_LOG_VERBOSE, "Using level %s.\n",
> > > level->name);
> > > +            sh->seq_level_idx[0] = level->level_idx;
> > > +        } else {
> > > +            av_log(avctx, AV_LOG_VERBOSE, "Stream will not
> > > conform to "
> > > +                   "any normal level, using level 6.3 by
> > > default.\n");
> > > +            sh->seq_level_idx[0] = 19;
> > > +            sh->seq_tier[0] = 1;
> > > +        }
> > > +    }
> > > +    vseq->seq_profile             = sh->seq_profile;
> > > +    vseq->seq_level_idx           = sh->seq_level_idx[0];
> > > +    vseq->seq_tier                = sh->seq_tier[0];
> > > +    vseq->order_hint_bits_minus_1 = sh->order_hint_bits_minus_1;
> > > +    vseq->intra_period            = ctx->gop_size;
> > > +    vseq->ip_period               = ctx->b_per_p + 1;
> > > +
> > > +    vseq->seq_fields.bits.enable_order_hint = sh-
> > > >enable_order_hint;
> > > +
> > > +    if (!(ctx->va_rc_mode & VA_RC_CQP)) {
> > > +        vseq->bits_per_second = ctx->va_bit_rate;
> > > +        vseq->seq_fields.bits.enable_cdef = sh->enable_cdef = 1;
> > > +    }
> > > +
> > > +    ret = vaapi_encode_av1_add_obu(avctx, obu,
> > > AV1_OBU_SEQUENCE_HEADER, &priv->sh);
> > > +    if (ret < 0)
> > > +        goto end;
> > > +
> > > +    ret = vaapi_encode_av1_write_obu(avctx, priv->sh_data,
> > > &priv->sh_data_len, obu);
> > > +    if (ret < 0)
> > > +        goto end;
> > > +
> > > +end:
> > > +    ff_cbs_fragment_reset(obu);
> > > +    return ret;
> > > +}
> > > +
> > > +static int vaapi_encode_av1_init_picture_params(AVCodecContext
> > > *avctx,
> > > +                                                VAAPIEncodePictu
> > > re *pic)
> > > +{
> > > +    VAAPIEncodeAV1Context          *priv = avctx->priv_data;
> > > +    VAAPIEncodeAV1Picture          *hpic = pic->priv_data;
> > > +    AV1RawOBU                    *fh_obu = &priv->fh;
> > > +    AV1RawFrameHeader                *fh = &fh_obu-
> > > >obu.frame.header;
> > > +    VAEncPictureParameterBufferAV1 *vpic = pic-
> > > >codec_picture_params;
> > > +    CodedBitstreamFragment          *obu = &priv->current_obu;
> > > +    VAAPIEncodePicture    *ref;
> > > +    VAAPIEncodeAV1Picture *href;
> > > +    int slot, i;
> > > +    int ret;
> > > +    static const int8_t
> > > default_loop_filter_ref_deltas[AV1_TOTAL_REFS_PER_FRAME] =
> > > +        { 1, 0, 0, 0, -1, 0, -1, -1 };
> > > +
> > > +    memset(fh_obu, 0, sizeof(*fh_obu));
> > > +    pic->nb_slices = priv->tile_groups;
> > > +    fh_obu->header.obu_type = AV1_OBU_FRAME_HEADER;
> > > +    fh_obu->header.obu_has_size_field = 1;
> > > +
> > > +    switch (pic->type) {
> > > +    case PICTURE_TYPE_IDR:
> > > +        av_assert0(pic->nb_refs[0] == 0 || pic->nb_refs[1]);
> > > +        fh->frame_type = AV1_FRAME_KEY;
> > > +        fh->refresh_frame_flags = 0xFF;
> > > +        fh->base_q_idx = priv->q_idx_idr;
> > > +        hpic->slot = 0;
> > > +        hpic->last_idr_frame = pic->display_order;
> > > +        break;
> > > +    case PICTURE_TYPE_P:
> > > +        av_assert0(pic->nb_refs[0]);
> > > +        fh->frame_type = AV1_FRAME_INTER;
> > > +        fh->base_q_idx = priv->q_idx_p;
> > > +        ref = pic->refs[0][pic->nb_refs[0] - 1];
> > > +        href = ref->priv_data;
> > > +        hpic->slot = !href->slot;
> > > +        hpic->last_idr_frame = href->last_idr_frame;
> > > +        fh->refresh_frame_flags = 1 << hpic->slot;
> > > +
> > > +        /** set the nearest frame in L0 as all reference frame.
> > > */
> > > +        for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
> > > +            fh->ref_frame_idx[i] = href->slot;
> > > +        }
> > > +        fh->primary_ref_frame = href->slot;
> > > +        fh->ref_order_hint[href->slot] = ref->display_order -
> > > href->last_idr_frame;
> > > +        vpic->ref_frame_ctrl_l0.fields.search_idx0 =
> > > AV1_REF_FRAME_LAST;
> > > +
> > > +        /** set the 2nd nearest frame in L0 as Golden frame. */
> > > +        if (pic->nb_refs[0] > 1) {
> > > +            ref = pic->refs[0][pic->nb_refs[0] - 2];
> > > +            href = ref->priv_data;
> > > +            fh->ref_frame_idx[3] = href->slot;
> > > +            fh->ref_order_hint[href->slot] = ref->display_order
> > > - href->last_idr_frame;
> > > +            vpic->ref_frame_ctrl_l0.fields.search_idx1 =
> > > AV1_REF_FRAME_GOLDEN;
> > > +        }
> > > +        break;
> > > +    case PICTURE_TYPE_B:
> > > +        av_assert0(pic->nb_refs[0] && pic->nb_refs[1]);
> > > +        fh->frame_type = AV1_FRAME_INTER;
> > > +        fh->base_q_idx = priv->q_idx_b;
> > > +        fh->refresh_frame_flags = 0x0;
> > > +        fh->reference_select = 1;
> > > +
> > > +        /** B frame will not be referenced, disable its recon
> > > frame. */
> > > +        vpic->picture_flags.bits.disable_frame_recon = 1;
> > > +
> > > +        /** Use LAST_FRAME and BWDREF_FRAME for reference. */
> > > +        vpic->ref_frame_ctrl_l0.fields.search_idx0 =
> > > AV1_REF_FRAME_LAST;
> > > +        vpic->ref_frame_ctrl_l1.fields.search_idx0 =
> > > AV1_REF_FRAME_BWDREF;
> > > +
> > > +        ref                            = pic->refs[0][pic-
> > > >nb_refs[0] - 1];
> > > +        href                           = ref->priv_data;
> > > +        hpic->last_idr_frame           = href->last_idr_frame;
> > > +        fh->primary_ref_frame          = href->slot;
> > > +        fh->ref_order_hint[href->slot] = ref->display_order -
> > > href->last_idr_frame;
> > > +        for (i = 0; i < AV1_REF_FRAME_GOLDEN; i++) {
> > > +            fh->ref_frame_idx[i] = href->slot;
> > > +        }
> > > +
> > > +        ref                            = pic->refs[1][pic-
> > > >nb_refs[1] - 1];
> > > +        href                           = ref->priv_data;
> > > +        fh->ref_order_hint[href->slot] = ref->display_order -
> > > href->last_idr_frame;
> > > +        for (i = AV1_REF_FRAME_GOLDEN; i < AV1_REFS_PER_FRAME;
> > > i++) {
> > > +            fh->ref_frame_idx[i] = href->slot;
> > > +        }
> > > +        break;
> > > +    default:
> > > +        av_assert0(0 && "invalid picture type");
> > > +    }
> > > +
> > > +    fh->show_frame                = pic->display_order <= pic-
> > > >encode_order;
> > > +    fh->showable_frame            = fh->frame_type !=
> > > AV1_FRAME_KEY;
> > > +    fh->frame_width_minus_1       = avctx->width - 1;
> > > +    fh->frame_height_minus_1      = avctx->height - 1;
> > > +    fh->render_width_minus_1      = fh->frame_width_minus_1;
> > > +    fh->render_height_minus_1     = fh->frame_height_minus_1;
> > > +    fh->order_hint                = pic->display_order - hpic-
> > > >last_idr_frame;
> > > +    fh->tile_cols                 = priv->tile_cols;
> > > +    fh->tile_rows                 = priv->tile_rows;
> > > +    fh->tile_cols_log2            = priv->tile_cols_log2;
> > > +    fh->tile_rows_log2            = priv->tile_rows_log2;
> > > +    fh->uniform_tile_spacing_flag = priv->uniform_tile;
> > > +    fh->tile_size_bytes_minus1    = priv-
> > > >attr_ext2.bits.tile_size_bytes_minus1;
> > > +    fh->reduced_tx_set            = 1;
> > > +
> > > +    /** ignore ONLY_4x4 mode for codedlossless is not fully
> > > implemented. */
> > > +    if (priv->attr_ext2.bits.tx_mode_support & 0x04)
> > > +        fh->tx_mode = AV1_TX_MODE_SELECT;
> > > +    else if (priv->attr_ext2.bits.tx_mode_support & 0x02)
> > > +        fh->tx_mode = AV1_TX_MODE_LARGEST;
> > > +    else
> > > +        return AVERROR(EINVAL);
> > > +
> > > +    for (i = 0; i < fh->tile_cols; i++)
> > > +        fh->width_in_sbs_minus_1[i] = vpic-
> > > >width_in_sbs_minus_1[i] = priv->width_in_sbs_minus_1[i];
> > > +
> > > +    for (i = 0; i < fh->tile_rows; i++)
> > > +        fh->height_in_sbs_minus_1[i] = vpic-
> > > >height_in_sbs_minus_1[i] = priv->height_in_sbs_minus_1[i];
> > > +
> > > +    memcpy(fh->loop_filter_ref_deltas,
> > > default_loop_filter_ref_deltas,
> > > +           AV1_TOTAL_REFS_PER_FRAME * sizeof(int8_t));
> > > +
> > > +    if (fh->frame_type == AV1_FRAME_KEY && fh->show_frame) {
> > > +        fh->error_resilient_mode = 1;
> > > +    }
> > > +
> > > +    if (fh->frame_type == AV1_FRAME_KEY || fh-
> > > >error_resilient_mode)
> > > +        fh->primary_ref_frame = AV1_PRIMARY_REF_NONE;
> > > +
> > > +    vpic->base_qindex          = fh->base_q_idx;
> > > +    vpic->frame_width_minus_1  = fh->frame_width_minus_1;
> > > +    vpic->frame_height_minus_1 = fh->frame_height_minus_1;
> > > +    vpic->primary_ref_frame    = fh->primary_ref_frame;
> > > +    vpic->reconstructed_frame  = pic->recon_surface;
> > > +    vpic->coded_buf            = pic->output_buffer;
> > > +    vpic->tile_cols            = fh->tile_cols;
> > > +    vpic->tile_rows            = fh->tile_rows;
> > > +    vpic->order_hint           = fh->order_hint;
> > > +#if VA_CHECK_VERSION(1, 15, 0)
> > > +    vpic->refresh_frame_flags  = fh->refresh_frame_flags;
> > > +#endif
> > > +
> > > +    vpic->picture_flags.bits.enable_frame_obu     = 0;
> > > +    vpic->picture_flags.bits.frame_type           = fh-
> > > >frame_type;
> > > +    vpic->picture_flags.bits.reduced_tx_set       = fh-
> > > >reduced_tx_set;
> > > +    vpic->picture_flags.bits.error_resilient_mode = fh-
> > > >error_resilient_mode;
> > > +
> > > +    /** let driver decide to use single or compound reference
> > > prediction mode. */
> > > +    vpic->mode_control_flags.bits.reference_mode = fh-
> > > >reference_select ? 2 : 0;
> > > +    vpic->mode_control_flags.bits.tx_mode = fh->tx_mode;
> > > +
> > > +    vpic->tile_group_obu_hdr_info.bits.obu_has_size_field = 1;
> > > +
> > > +    /** set reference. */
> > > +    for (i = 0; i < AV1_REFS_PER_FRAME; i++)
> > > +        vpic->ref_frame_idx[i] = fh->ref_frame_idx[i];
> > > +
> > > +    for (i = 0; i < FF_ARRAY_ELEMS(vpic->reference_frames); i++)
> > > +        vpic->reference_frames[i] = VA_INVALID_SURFACE;
> > > +
> > > +    for (i = 0; i < MAX_REFERENCE_LIST_NUM; i++) {
> > > +        for (int j = 0; j < pic->nb_refs[i]; j++) {
> > > +            VAAPIEncodePicture *ref_pic = pic->refs[i][j];
> > > +
> > > +            slot = ((VAAPIEncodeAV1Picture*)ref_pic->priv_data)-
> > > >slot;
> > > +            av_assert0(vpic->reference_frames[slot] ==
> > > VA_INVALID_SURFACE);
> > > +
> > > +            vpic->reference_frames[slot] = ref_pic-
> > > >recon_surface;
> > > +        }
> > > +    }
> > > +
> > > +    /** pack frame header, and set va params offset like
> > > bit_offset_qindex etc. */
> > > +    ret = vaapi_encode_av1_write_frame_header(avctx, pic, priv-
> > > >fh_data, &priv->fh_data_len);
> > > +    if (ret < 0)
> > > +        goto end;
> > > +
> > > +end:
> > > +    ff_cbs_fragment_reset(obu);
> > > +    return ret;
> > > +}
> > > +
> > > +static int vaapi_encode_av1_init_slice_params(AVCodecContext
> > > *avctx,
> > > +                                              VAAPIEncodePicture
> > > *pic,
> > > +                                              VAAPIEncodeSlice
> > > *slice)
> > > +{
> > > +    VAAPIEncodeAV1Context      *priv = avctx->priv_data;
> > > +    VAEncTileGroupBufferAV1  *vslice = slice-
> > > >codec_slice_params;
> > > +    CodedBitstreamAV1Context  *cbctx = priv->cbc->priv_data;
> > > +    int div;
> > > +
> > > +    /** Set tile group info. */
> > > +    div = priv->tile_cols * priv->tile_rows / priv->tile_groups;
> > > +    vslice->tg_start = slice->index * div;
> > > +    if (slice->index == (priv->tile_groups - 1)) {
> > > +        vslice->tg_end = priv->tile_cols * priv->tile_rows - 1;
> > > +        cbctx->seen_frame_header = 0;
> > > +    } else {
> > > +        vslice->tg_end = (slice->index + 1) * div - 1;
> > > +    }
> > > +
> > > +    return 0;
> > > +}
> > > +
> > > +static int vaapi_encode_av1_write_picture_header(AVCodecContext
> > > *avctx,
> > > +                                                 VAAPIEncodePict
> > > ure *pic,
> > > +                                                 char *data,
> > > size_t *data_len)
> > > +{
> > > +    VAAPIEncodeAV1Context     *priv = avctx->priv_data;
> > > +    CodedBitstreamFragment     *obu = &priv->current_obu;
> > > +    AV1RawOBU               *fh_obu = &priv->fh;
> > > +    AV1RawFrameHeader       *rep_fh = &fh_obu->obu.frame_header;
> > > +    VAAPIEncodeAV1Picture *href;
> > > +    int ret = 0;
> > > +
> > > +    pic->tail_size = 0;
> > > +    /** Pack repeat frame header. */
> > > +    if (pic->display_order > pic->encode_order) {
> > > +        memset(fh_obu, 0, sizeof(*fh_obu));
> > > +        href = pic->refs[0][pic->nb_refs[0] - 1]->priv_data;
> > > +        fh_obu->header.obu_type = AV1_OBU_FRAME_HEADER;
> > > +        fh_obu->header.obu_has_size_field = 1;
> > > +
> > > +        rep_fh->show_existing_frame   = 1;
> > > +        rep_fh->frame_to_show_map_idx = href->slot == 0;
> > > +        rep_fh->frame_type            = AV1_FRAME_INTER;
> > > +        rep_fh->frame_width_minus_1   = avctx->width - 1;
> > > +        rep_fh->frame_height_minus_1  = avctx->height - 1;
> > > +        rep_fh->render_width_minus_1  = rep_fh-
> > > >frame_width_minus_1;
> > > +        rep_fh->render_height_minus_1 = rep_fh-
> > > >frame_height_minus_1;
> > > +
> > > +        ret = vaapi_encode_av1_write_frame_header(avctx, pic,
> > > pic->tail_data, &pic->tail_size);
> > > +        if (ret < 0)
> > > +            goto end;
> > > +
> > > +        pic->tail_size /= 8;
> > > +    }
> > > +
> > > +    memcpy(data, &priv->fh_data, MAX_PARAM_BUFFER_SIZE *
> > > sizeof(char));
> > > +    *data_len = priv->fh_data_len;
> > > +
> > > +end:
> > > +    ff_cbs_fragment_reset(obu);
> > > +    return ret;
> > > +}
> > > +
> > > +static const VAAPIEncodeProfile vaapi_encode_av1_profiles[] = {
> > > +    { FF_PROFILE_AV1_MAIN,  8, 3, 1, 1, VAProfileAV1Profile0 },
> > > +    { FF_PROFILE_AV1_MAIN, 10, 3, 1, 1, VAProfileAV1Profile0 },
> > > +    { FF_PROFILE_UNKNOWN }
> > > +};
> > > +
> > > +static const VAAPIEncodeType vaapi_encode_type_av1 = {
> > > +    .profiles        = vaapi_encode_av1_profiles,
> > > +    .flags           = FLAG_B_PICTURES,
> > > +    .default_quality = 25,
> > > +    .configure       = &vaapi_encode_av1_configure,
> > > +
> > > +    .sequence_header_type  = VAEncPackedHeaderSequence,
> > > +    .sequence_params_size  =
> > > sizeof(VAEncSequenceParameterBufferAV1),
> > > +    .init_sequence_params  =
> > > &vaapi_encode_av1_init_sequence_params,
> > > +    .write_sequence_header =
> > > &vaapi_encode_av1_write_sequence_header,
> > > +
> > > +    .picture_priv_data_size = sizeof(VAAPIEncodeAV1Picture),
> > > +    .picture_header_type    = VAEncPackedHeaderPicture,
> > > +    .picture_params_size    =
> > > sizeof(VAEncPictureParameterBufferAV1),
> > > +    .init_picture_params    =
> > > &vaapi_encode_av1_init_picture_params,
> > > +    .write_picture_header   =
> > > &vaapi_encode_av1_write_picture_header,
> > > +
> > > +    .slice_params_size = sizeof(VAEncTileGroupBufferAV1),
> > > +    .init_slice_params = &vaapi_encode_av1_init_slice_params,
> > > +};
> > > +
> > > +static av_cold int vaapi_encode_av1_init(AVCodecContext *avctx)
> > > +{
> > > +    VAAPIEncodeContext      *ctx = avctx->priv_data;
> > > +    VAAPIEncodeAV1Context  *priv = avctx->priv_data;
> > > +    VAConfigAttrib attr;
> > > +    VAStatus vas;
> > > +    int ret;
> > > +
> > > +    ctx->codec = &vaapi_encode_type_av1;
> > > +
> > > +    ctx->desired_packed_headers =
> > > +        VA_ENC_PACKED_HEADER_SEQUENCE |
> > > +        VA_ENC_PACKED_HEADER_PICTURE;
> > > +
> > > +    if (avctx->profile == FF_PROFILE_UNKNOWN)
> > > +        avctx->profile = priv->profile;
> > > +    if (avctx->level == FF_LEVEL_UNKNOWN)
> > > +        avctx->level = priv->level;
> > > +
> > > +    if (avctx->level != FF_LEVEL_UNKNOWN && avctx->level &
> > > ~0x1f) {
> > > +        av_log(avctx, AV_LOG_ERROR, "Invalid level %d\n", avctx-
> > > >level);
> > > +        return AVERROR(EINVAL);
> > > +    }
> > > +
> > > +    ret = ff_vaapi_encode_init(avctx);
> > > +    if (ret < 0)
> > > +        return ret;
> > > +
> > > +    attr.type = VAConfigAttribEncAV1;
> > > +    vas = vaGetConfigAttributes(ctx->hwctx->display,
> > > +                                ctx->va_profile,
> > > +                                ctx->va_entrypoint,
> > > +                                &attr, 1);
> > > +    if (vas != VA_STATUS_SUCCESS) {
> > > +        av_log(avctx, AV_LOG_ERROR, "Failed to query "
> > > +               "config attribute: %d (%s).\n", vas,
> > > vaErrorStr(vas));
> > > +        return AVERROR_EXTERNAL;
> > > +    } else if (attr.value == VA_ATTRIB_NOT_SUPPORTED) {
> > > +        priv->attr.value = 0;
> > > +        av_log(avctx, AV_LOG_WARNING, "Attribute type:%d is not
> > > "
> > > +               "supported.\n", attr.type);
> > > +    } else {
> > > +        priv->attr.value = attr.value;
> > > +    }
> > > +
> > > +    attr.type = VAConfigAttribEncAV1Ext1;
> > > +    vas = vaGetConfigAttributes(ctx->hwctx->display,
> > > +                                ctx->va_profile,
> > > +                                ctx->va_entrypoint,
> > > +                                &attr, 1);
> > > +    if (vas != VA_STATUS_SUCCESS) {
> > > +        av_log(avctx, AV_LOG_ERROR, "Failed to query "
> > > +               "config attribute: %d (%s).\n", vas,
> > > vaErrorStr(vas));
> > > +        return AVERROR_EXTERNAL;
> > > +    } else if (attr.value == VA_ATTRIB_NOT_SUPPORTED) {
> > > +        priv->attr_ext1.value = 0;
> > > +        av_log(avctx, AV_LOG_WARNING, "Attribute type:%d is not
> > > "
> > > +               "supported.\n", attr.type);
> > > +    } else {
> > > +        priv->attr_ext1.value = attr.value;
> > > +    }
> > > +
> > > +    /** This attr provides essential indicators, return error if
> > > not support. */
> > > +    attr.type = VAConfigAttribEncAV1Ext2;
> > > +    vas = vaGetConfigAttributes(ctx->hwctx->display,
> > > +                                ctx->va_profile,
> > > +                                ctx->va_entrypoint,
> > > +                                &attr, 1);
> > > +    if (vas != VA_STATUS_SUCCESS || attr.value ==
> > > VA_ATTRIB_NOT_SUPPORTED) {
> > > +        av_log(avctx, AV_LOG_ERROR, "Failed to query "
> > > +               "config attribute: %d (%s).\n", vas,
> > > vaErrorStr(vas));
> > > +        return AVERROR_EXTERNAL;
> > > +    } else {
> > > +        priv->attr_ext2.value = attr.value;
> > > +    }
> > > +
> > > +    ret = vaapi_encode_av1_set_tile(avctx);
> > > +    if (ret < 0)
> > > +        return ret;
> > > +
> > > +    return 0;
> > > +}
> > > +
> > > +static av_cold int vaapi_encode_av1_close(AVCodecContext *avctx)
> > > +{
> > > +    VAAPIEncodeAV1Context *priv = avctx->priv_data;
> > > +
> > > +    ff_cbs_fragment_free(&priv->current_obu);
> > > +    ff_cbs_close(&priv->cbc);
> > > +
> > > +    return ff_vaapi_encode_close(avctx);
> > > +}
> > > +
> > > +#define OFFSET(x) offsetof(VAAPIEncodeAV1Context, x)
> > > +#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM |
> > > AV_OPT_FLAG_ENCODING_PARAM)
> > > +
> > > +static const AVOption vaapi_encode_av1_options[] = {
> > > +    VAAPI_ENCODE_COMMON_OPTIONS,
> > > +    VAAPI_ENCODE_RC_OPTIONS,
> > > +    { "profile", "Set profile (seq_profile)",
> > > +      OFFSET(profile), AV_OPT_TYPE_INT,
> > > +      { .i64 = FF_PROFILE_UNKNOWN }, FF_PROFILE_UNKNOWN, 0xff,
> > > FLAGS, "profile" },
> > > +
> > > +#define PROFILE(name, value)  name, NULL, 0, AV_OPT_TYPE_CONST,
> > > \
> > > +    { .i64 = value }, 0, 0, FLAGS, "profile"
> > > +    { PROFILE("main",               FF_PROFILE_AV1_MAIN) },
> > > +    { PROFILE("high",               FF_PROFILE_AV1_HIGH) },
> > > +    { PROFILE("professional",       FF_PROFILE_AV1_PROFESSIONAL)
> > > },
> > > +#undef PROFILE
> > > +
> > > +    { "tier", "Set tier (seq_tier)",
> > > +      OFFSET(tier), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS,
> > > "tier" },
> > > +    { "main", NULL, 0, AV_OPT_TYPE_CONST,
> > > +      { .i64 = 0 }, 0, 0, FLAGS, "tier" },
> > > +    { "high", NULL, 0, AV_OPT_TYPE_CONST,
> > > +      { .i64 = 1 }, 0, 0, FLAGS, "tier" },
> > > +    { "level", "Set level (seq_level_idx)",
> > > +      OFFSET(level), AV_OPT_TYPE_INT,
> > > +      { .i64 = FF_LEVEL_UNKNOWN }, FF_LEVEL_UNKNOWN, 0x1f,
> > > FLAGS, "level" },
> > > +
> > > +#define LEVEL(name, value) name, NULL, 0, AV_OPT_TYPE_CONST, \
> > > +      { .i64 = value }, 0, 0, FLAGS, "level"
> > > +    { LEVEL("2.0",  0) },
> > > +    { LEVEL("2.1",  1) },
> > > +    { LEVEL("3.0",  4) },
> > > +    { LEVEL("3.1",  5) },
> > > +    { LEVEL("4.0",  8) },
> > > +    { LEVEL("4.1",  9) },
> > > +    { LEVEL("5.0", 12) },
> > > +    { LEVEL("5.1", 13) },
> > > +    { LEVEL("5.2", 14) },
> > > +    { LEVEL("5.3", 15) },
> > > +    { LEVEL("6.0", 16) },
> > > +    { LEVEL("6.1", 17) },
> > > +    { LEVEL("6.2", 18) },
> > > +    { LEVEL("6.3", 19) },
> > > +#undef LEVEL
> > > +
> > > +    { "tiles", "Tile columns x rows",
> > > +      OFFSET(tile_cols), AV_OPT_TYPE_IMAGE_SIZE, { .str = "1x1"
> > > }, 1, AV1_MAX_TILE_COLS, FLAGS },
> > > +    { "tile_groups", "Number of tile groups for encoding",
> > > +      OFFSET(tile_groups), AV_OPT_TYPE_INT, { .i64 = 1 }, 1,
> > > AV1_MAX_TILE_ROWS * AV1_MAX_TILE_COLS, FLAGS },
> > > +
> > > +    { NULL },
> > > +};
> > > +
> > > +static const FFCodecDefault vaapi_encode_av1_defaults[] = {
> > > +    { "b",              "0"   },
> > > +    { "bf",             "7"   },
> > > +    { "g",              "120" },
> > > +    { "qmin",           "1"   },
> > > +    { "qmax",           "255" },
> > > +    { NULL },
> > > +};
> > > +
> > > +static const AVClass vaapi_encode_av1_class = {
> > > +    .class_name = "av1_vaapi",
> > > +    .item_name  = av_default_item_name,
> > > +    .option     = vaapi_encode_av1_options,
> > > +    .version    = LIBAVUTIL_VERSION_INT,
> > > +};
> > > +
> > > +const FFCodec ff_av1_vaapi_encoder = {
> > > +    .p.name         = "av1_vaapi",
> > > +    CODEC_LONG_NAME("AV1 (VAAPI)"),
> > > +    .p.type         = AVMEDIA_TYPE_VIDEO,
> > > +    .p.id           = AV_CODEC_ID_AV1,
> > > +    .priv_data_size = sizeof(VAAPIEncodeAV1Context),
> > > +    .init           = &vaapi_encode_av1_init,
> > > +    FF_CODEC_RECEIVE_PACKET_CB(&ff_vaapi_encode_receive_packet),
> > > +    .close          = &vaapi_encode_av1_close,
> > > +    .p.priv_class   = &vaapi_encode_av1_class,
> > > +    .p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE
> > > |
> > > +                      AV_CODEC_CAP_DR1 |
> > > AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE,
> > > +    .caps_internal  = FF_CODEC_CAP_NOT_INIT_THREADSAFE |
> > > +                      FF_CODEC_CAP_INIT_CLEANUP,
> > > +    .defaults       = vaapi_encode_av1_defaults,
> > > +    .p.pix_fmts = (const enum AVPixelFormat[]) {
> > > +        AV_PIX_FMT_VAAPI,
> > > +        AV_PIX_FMT_NONE,
> > > +    },
> > > +    .hw_configs     = ff_vaapi_encode_hw_configs,
> > > +    .p.wrapper_name = "vaapi",
> > > +};
> > > --
> > > 2.25.1
> > > 
> > 
> > Seems sensible, but your sender address got rewritten. You might
> > want
> > to add an additional
> > 
> > From: Fei Wang <fei.w.wang@intel.com>

This field has been automatically added when I used git email-send
like:

root@fei:~ffmpeg# git send-email -1 --subject-prefix="FFmpeg-
devel][PATCH v1" --annotate --to fei.w.wang@intel.com

From bfe7d8b4d00b209b30c4992b0fc6fecf0e8ec1ea Mon Sep 17 00:00:00 2001
From: Fei Wang <fei.w.wang@intel.com>
Date: Tue, 1 Aug 2023 09:11:14 +0800
Subject: [FFmpeg-devel][PATCH v1] test

This is a test patch.

Signed-off-by: Fei Wang <fei.w.wang@intel.com>
---
 libavcodec/vaapi_encode.c      | 61 ++++++++++++++++++++++++++--------
 libavcodec/vaapi_encode.h      |  4 +++
 libavcodec/vaapi_encode_h264.c | 47 ++++++++++++++++++++------
 3 files changed, 89 insertions(+), 23 deletions(-)

From field message looks good when I send the mail to all other
personal account, but will be rewritten when send it to ffmpeg-devel.


Thanks

> > 
> > at the top of your commit message body if you can't get the "From"
> > field in the emails to be set properly when you send to the list.
> > 
> > Otherwise the patch series LGTM. I don't have any hardware to
> > exercise
> > this code with though, so that makes reviewing its behavior a
> > little
> > tough.
> > 
> > Regardless, I can at least ack it.
> > 
> > Acked-by: Neal Gompa <ngompa13@gmail.com>
> > 
> 
> Oops, forgot to actually CC you for this.
> 
> 
>
diff mbox series

Patch

diff --git a/Changelog b/Changelog
index 3876082844..7ae9b85d52 100644
--- a/Changelog
+++ b/Changelog
@@ -25,6 +25,7 @@  version <next>:
 - Raw VVC bitstream parser, muxer and demuxer
 - Bitstream filter for editing metadata in VVC streams
 - Bitstream filter for converting VVC from MP4 to Annex B
+- VAAPI AV1 encoder
 
 version 6.0:
 - Radiance HDR image support
diff --git a/configure b/configure
index b018abf139..fbc729b5c6 100755
--- a/configure
+++ b/configure
@@ -3323,6 +3323,8 @@  av1_qsv_decoder_select="qsvdec"
 av1_qsv_encoder_select="qsvenc"
 av1_qsv_encoder_deps="libvpl"
 av1_amf_encoder_deps="amf"
+av1_vaapi_encoder_deps="VAEncPictureParameterBufferAV1"
+av1_vaapi_encoder_select="cbs_av1 vaapi_encode"
 
 # parsers
 aac_parser_select="adts_header mpeg4audio"
@@ -7104,6 +7106,7 @@  if enabled vaapi; then
     check_type "va/va.h va/va_enc_jpeg.h" "VAEncPictureParameterBufferJPEG"
     check_type "va/va.h va/va_enc_vp8.h"  "VAEncPictureParameterBufferVP8"
     check_type "va/va.h va/va_enc_vp9.h"  "VAEncPictureParameterBufferVP9"
+    check_type "va/va.h va/va_enc_av1.h"  "VAEncPictureParameterBufferAV1"
 fi
 
 if enabled_all opencl libdrm ; then
diff --git a/doc/encoders.texi b/doc/encoders.texi
index 25d6b7f09e..fb331ebd8e 100644
--- a/doc/encoders.texi
+++ b/doc/encoders.texi
@@ -3991,6 +3991,19 @@  Average variable bitrate.
 Each encoder also has its own specific options:
 @table @option
 
+@item av1_vaapi
+@option{profile} sets the value of @emph{seq_profile}.
+@option{tier} sets the value of @emph{seq_tier}.
+@option{level} sets the value of @emph{seq_level_idx}.
+
+@table @option
+@item tiles
+Set the number of tiles to encode the input video with, as columns x rows.
+(default is 1x1).
+@item tile_groups
+Set tile groups number (default is 1).
+@end table
+
 @item h264_vaapi
 @option{profile} sets the value of @emph{profile_idc} and the @emph{constraint_set*_flag}s.
 @option{level} sets the value of @emph{level_idc}.
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 3cd5997e64..fe1e6aa99d 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -259,6 +259,7 @@  OBJS-$(CONFIG_AV1_MEDIACODEC_DECODER)  += mediacodecdec.o
 OBJS-$(CONFIG_AV1_MEDIACODEC_ENCODER)  += mediacodecenc.o
 OBJS-$(CONFIG_AV1_NVENC_ENCODER)       += nvenc_av1.o nvenc.o
 OBJS-$(CONFIG_AV1_QSV_ENCODER)         += qsvenc_av1.o
+OBJS-$(CONFIG_AV1_VAAPI_ENCODER)       += vaapi_encode_av1.o av1_profile_level.o
 OBJS-$(CONFIG_AVRN_DECODER)            += avrndec.o
 OBJS-$(CONFIG_AVRP_DECODER)            += r210dec.o
 OBJS-$(CONFIG_AVRP_ENCODER)            += r210enc.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 8775d15a4f..c43c1d7b48 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -844,6 +844,7 @@  extern const FFCodec ff_av1_nvenc_encoder;
 extern const FFCodec ff_av1_qsv_decoder;
 extern const FFCodec ff_av1_qsv_encoder;
 extern const FFCodec ff_av1_amf_encoder;
+extern const FFCodec ff_av1_vaapi_encoder;
 extern const FFCodec ff_libopenh264_encoder;
 extern const FFCodec ff_libopenh264_decoder;
 extern const FFCodec ff_h264_amf_encoder;
diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
index 2604f12b9e..2907e159fb 100644
--- a/libavcodec/vaapi_encode.c
+++ b/libavcodec/vaapi_encode.c
@@ -669,6 +669,15 @@  static int vaapi_encode_set_output_timestamp(AVCodecContext *avctx,
 {
     VAAPIEncodeContext *ctx = avctx->priv_data;
 
+    // AV1 packs P frame and next B frame into one pkt, and uses the other
+    // repeat frame header pkt at the display order position of the P frame
+    // to indicate its frame index. Each frame has a corresponding pkt in its
+    // display order position. So don't need to consider delay for AV1 timestamp.
+    if (avctx->codec_id == AV_CODEC_ID_AV1) {
+        pkt->dts = pkt->pts - ctx->dts_pts_diff;
+        return 0;
+    }
+
     if (ctx->output_delay == 0) {
         pkt->dts = pkt->pts;
     } else if (pic->encode_order < ctx->decode_delay) {
@@ -689,9 +698,10 @@  static int vaapi_encode_output(AVCodecContext *avctx,
 {
     VAAPIEncodeContext *ctx = avctx->priv_data;
     VACodedBufferSegment *buf_list, *buf;
-    VAStatus vas;
+    AVPacket *pkt_ptr = pkt;
     int total_size = 0;
     uint8_t *ptr;
+    VAStatus vas;
     int err;
 
     err = vaapi_encode_wait(avctx, pic);
@@ -711,11 +721,52 @@  static int vaapi_encode_output(AVCodecContext *avctx,
     for (buf = buf_list; buf; buf = buf->next)
         total_size += buf->size;
 
-    err = ff_get_encode_buffer(avctx, pkt, total_size, 0);
-    ptr = pkt->data;
+    /** repack av1 coded frame for not display and repeat frames */
+    if (avctx->codec_id == AV_CODEC_ID_AV1) {
+        int display_frame = pic->display_order <= pic->encode_order;
 
-    if (err < 0)
-        goto fail_mapped;
+        if (display_frame) {
+            total_size += ctx->header_data_size;
+            err = ff_get_encode_buffer(avctx, pkt, total_size, 0);
+            if (err < 0)
+                goto fail_mapped;
+            ptr = pkt->data;
+
+            if (ctx->header_data_size) {
+                memcpy(ptr, ctx->header_data, ctx->header_data_size);
+                ptr += ctx->header_data_size;
+                ctx->header_data_size = 0;
+            }
+        } else {
+            ctx->header_data = av_realloc(ctx->header_data, total_size);
+            if (!ctx->header_data) {
+                err = AVERROR(ENOMEM);
+                goto fail_mapped;
+            }
+            ptr = ctx->header_data;
+            ctx->header_data_size = total_size;
+
+            if (pic->tail_size) {
+                if (ctx->tail_pkt->size) {
+                    err = AVERROR(AVERROR_BUG);
+                    goto fail_mapped;
+                }
+
+                err = ff_get_encode_buffer(avctx, ctx->tail_pkt, pic->tail_size, 0);
+                if (err < 0)
+                    goto fail_mapped;
+
+                memcpy(ctx->tail_pkt->data, pic->tail_data, pic->tail_size);
+                pkt_ptr = ctx->tail_pkt;
+            }
+        }
+    } else {
+        err = ff_get_encode_buffer(avctx, pkt, total_size, 0);
+        ptr = pkt->data;
+
+        if (err < 0)
+            goto fail_mapped;
+    }
 
     for (buf = buf_list; buf; buf = buf->next) {
         av_log(avctx, AV_LOG_DEBUG, "Output buffer: %u bytes "
@@ -726,10 +777,10 @@  static int vaapi_encode_output(AVCodecContext *avctx,
     }
 
     if (pic->type == PICTURE_TYPE_IDR)
-        pkt->flags |= AV_PKT_FLAG_KEY;
+        pkt_ptr->flags |= AV_PKT_FLAG_KEY;
 
-    pkt->pts = pic->pts;
-    pkt->duration = pic->duration;
+    pkt_ptr->pts = pic->pts;
+    pkt_ptr->duration = pic->duration;
 
     vas = vaUnmapBuffer(ctx->hwctx->display, pic->output_buffer);
     if (vas != VA_STATUS_SUCCESS) {
@@ -742,8 +793,8 @@  static int vaapi_encode_output(AVCodecContext *avctx,
     // for no-delay encoders this is handled in generic codec
     if (avctx->codec->capabilities & AV_CODEC_CAP_DELAY &&
         avctx->flags & AV_CODEC_FLAG_COPY_OPAQUE) {
-        pkt->opaque     = pic->opaque;
-        pkt->opaque_ref = pic->opaque_ref;
+        pkt_ptr->opaque     = pic->opaque;
+        pkt_ptr->opaque_ref = pic->opaque_ref;
         pic->opaque_ref = NULL;
     }
 
@@ -752,6 +803,9 @@  static int vaapi_encode_output(AVCodecContext *avctx,
 
     av_log(avctx, AV_LOG_DEBUG, "Output read for pic %"PRId64"/%"PRId64".\n",
            pic->display_order, pic->encode_order);
+
+    vaapi_encode_set_output_timestamp(avctx, pic, pkt_ptr);
+
     return 0;
 
 fail_mapped:
@@ -1128,9 +1182,19 @@  static int vaapi_encode_pick_next(AVCodecContext *avctx,
 
     vaapi_encode_add_ref(avctx, pic, pic, 0, 1, 0);
     if (pic->type != PICTURE_TYPE_IDR) {
-        vaapi_encode_add_ref(avctx, pic, start,
-                             pic->type == PICTURE_TYPE_P,
-                             b_counter > 0, 0);
+        // TODO: apply both previous and forward multi reference for all vaapi encoders.
+        // And L0/L1 reference frame number can be set dynamically through query
+        // VAConfigAttribEncMaxRefFrames attribute.
+        if (avctx->codec_id == AV_CODEC_ID_AV1) {
+            for (i = 0; i < ctx->nb_next_prev; i++)
+                vaapi_encode_add_ref(avctx, pic, ctx->next_prev[i],
+                                     pic->type == PICTURE_TYPE_P,
+                                     b_counter > 0, 0);
+        } else
+            vaapi_encode_add_ref(avctx, pic, start,
+                                 pic->type == PICTURE_TYPE_P,
+                                 b_counter > 0, 0);
+
         vaapi_encode_add_ref(avctx, pic, ctx->next_prev[ctx->nb_next_prev - 1], 0, 0, 1);
     }
 
@@ -1292,6 +1356,19 @@  int ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
     AVFrame *frame = ctx->frame;
     int err;
 
+start:
+    /** if no B frame before repeat P frame, sent repeat P frame out. */
+    if (avctx->codec_id == AV_CODEC_ID_AV1 && ctx->tail_pkt->size) {
+        for (VAAPIEncodePicture *tmp = ctx->pic_start; tmp; tmp = tmp->next) {
+            if (tmp->type == PICTURE_TYPE_B && tmp->pts < ctx->tail_pkt->pts)
+                break;
+            else if (!tmp->next) {
+                av_packet_move_ref(pkt, ctx->tail_pkt);
+                goto end;
+            }
+        }
+    }
+
     err = ff_encode_get_frame(avctx, frame);
     if (err < 0 && err != AVERROR_EOF)
         return err;
@@ -1356,17 +1433,21 @@  int ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
         return err;
     }
 
-    vaapi_encode_set_output_timestamp(avctx, pic, pkt);
-    av_log(avctx, AV_LOG_DEBUG, "Output packet: pts %"PRId64", dts %"PRId64", "
-           "size %u bytes.\n", pkt->pts, pkt->dts, pkt->size);
-
     ctx->output_order = pic->encode_order;
     vaapi_encode_clear_old(avctx);
 
+    /** loop to get an available pkt in encoder flushing. */
+    if (ctx->end_of_stream && !pkt->size)
+        goto start;
+
+end:
+    if (pkt->size)
+        av_log(avctx, AV_LOG_DEBUG, "Output packet: pts %"PRId64", dts %"PRId64", "
+               "size %u bytes.\n", pkt->pts, pkt->dts, pkt->size);
+
     return 0;
 }
 
-
 static av_cold void vaapi_encode_add_global_param(AVCodecContext *avctx, int type,
                                                   void *buffer, size_t size)
 {
@@ -2667,6 +2748,12 @@  av_cold int ff_vaapi_encode_init(AVCodecContext *avctx)
     ctx->device = (AVHWDeviceContext*)ctx->device_ref->data;
     ctx->hwctx = ctx->device->hwctx;
 
+    ctx->tail_pkt = av_packet_alloc();
+    if (!ctx->tail_pkt) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
     err = vaapi_encode_profile_entrypoint(avctx);
     if (err < 0)
         goto fail;
@@ -2859,9 +2946,11 @@  av_cold int ff_vaapi_encode_close(AVCodecContext *avctx)
     }
 
     av_frame_free(&ctx->frame);
+    av_packet_free(&ctx->tail_pkt);
 
     av_freep(&ctx->codec_sequence_params);
     av_freep(&ctx->codec_picture_params);
+    av_freep(&ctx->header_data);
     av_fifo_freep2(&ctx->encode_fifo);
 
     av_buffer_unref(&ctx->recon_frames_ref);
diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
index d5452a37b3..03df8d6d46 100644
--- a/libavcodec/vaapi_encode.h
+++ b/libavcodec/vaapi_encode.h
@@ -133,6 +133,11 @@  typedef struct VAAPIEncodePicture {
 
     int          nb_slices;
     VAAPIEncodeSlice *slices;
+
+    /** Tail data of current pic, used only for repeat header of AV1. */
+    char tail_data[MAX_PARAM_BUFFER_SIZE];
+    /** Byte length of tail_data. */
+    size_t tail_size;
 } VAAPIEncodePicture;
 
 typedef struct VAAPIEncodeProfile {
@@ -367,6 +372,13 @@  typedef struct VAAPIEncodeContext {
     AVFifo          *encode_fifo;
     // Max number of frame buffered in encoder.
     int             async_depth;
+
+    /** Head data for current output pkt, used only for AV1. */
+    void  *header_data;
+    size_t header_data_size;
+
+    /** Store av1 repeat frame header pkt. */
+    AVPacket *tail_pkt;
 } VAAPIEncodeContext;
 
 enum {
diff --git a/libavcodec/vaapi_encode_av1.c b/libavcodec/vaapi_encode_av1.c
new file mode 100644
index 0000000000..778c5b35c7
--- /dev/null
+++ b/libavcodec/vaapi_encode_av1.c
@@ -0,0 +1,1229 @@ 
+/*
+ * Copyright (c) 2023 Intel Corporation
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <va/va.h>
+#include <va/va_enc_av1.h>
+
+#include "libavutil/pixdesc.h"
+#include "libavutil/opt.h"
+
+#include "cbs_av1.h"
+#include "put_bits.h"
+#include "codec_internal.h"
+#include "av1_profile_level.h"
+#include "vaapi_encode.h"
+
+#define AV1_MAX_QUANT 255
+
+typedef struct VAAPIEncodeAV1Picture {
+    int64_t last_idr_frame;
+    int slot;
+} VAAPIEncodeAV1Picture;
+
+typedef struct VAAPIEncodeAV1Context {
+    VAAPIEncodeContext common;
+    AV1RawOBU sh; /**< sequence header.*/
+    AV1RawOBU fh; /**< frame header.*/
+    CodedBitstreamContext *cbc;
+    CodedBitstreamFragment current_obu;
+    VAConfigAttribValEncAV1 attr;
+    VAConfigAttribValEncAV1Ext1 attr_ext1;
+    VAConfigAttribValEncAV1Ext2 attr_ext2;
+
+    char sh_data[MAX_PARAM_BUFFER_SIZE]; /**< coded sequence header data. */
+    size_t sh_data_len; /**< bit length of sh_data. */
+    char fh_data[MAX_PARAM_BUFFER_SIZE]; /**< coded frame header data. */
+    size_t fh_data_len; /**< bit length of fh_data. */
+
+    uint8_t uniform_tile;
+    uint8_t use_128x128_superblock;
+    int sb_cols;
+    int sb_rows;
+    int tile_cols_log2;
+    int tile_rows_log2;
+    int max_tile_width_sb;
+    int max_tile_height_sb;
+    uint8_t width_in_sbs_minus_1[AV1_MAX_TILE_COLS];
+    uint8_t height_in_sbs_minus_1[AV1_MAX_TILE_ROWS];
+
+    int min_log2_tile_cols;
+    int max_log2_tile_cols;
+    int min_log2_tile_rows;
+    int max_log2_tile_rows;
+
+    int q_idx_idr;
+    int q_idx_p;
+    int q_idx_b;
+
+    /** user options */
+    int profile;
+    int level;
+    int tier;
+    int tile_cols, tile_rows;
+    int tile_groups;
+} VAAPIEncodeAV1Context;
+
+static av_cold int vaapi_encode_av1_configure(AVCodecContext *avctx)
+{
+    VAAPIEncodeContext     *ctx = avctx->priv_data;
+    VAAPIEncodeAV1Context *priv = avctx->priv_data;
+    int ret;
+
+    ret = ff_cbs_init(&priv->cbc, AV_CODEC_ID_AV1, avctx);
+    if (ret < 0)
+        return ret;
+
+    if (ctx->rc_mode->quality) {
+        priv->q_idx_p = av_clip(ctx->rc_quality, 0, AV1_MAX_QUANT);
+        if (fabs(avctx->i_quant_factor) > 0.0)
+            priv->q_idx_idr =
+                av_clip((fabs(avctx->i_quant_factor) * priv->q_idx_p  +
+                         avctx->i_quant_offset) + 0.5,
+                        0, AV1_MAX_QUANT);
+        else
+            priv->q_idx_idr = priv->q_idx_p;
+
+        if (fabs(avctx->b_quant_factor) > 0.0)
+            priv->q_idx_b =
+                av_clip((fabs(avctx->b_quant_factor) * priv->q_idx_p  +
+                         avctx->b_quant_offset) + 0.5,
+                        0, AV1_MAX_QUANT);
+        else
+            priv->q_idx_b = priv->q_idx_p;
+    } else {
+        /** Arbitrary value */
+        priv->q_idx_idr = priv->q_idx_p = priv->q_idx_b = 128;
+    }
+
+    return 0;
+}
+
+static int vaapi_encode_av1_add_obu(AVCodecContext *avctx,
+                                    CodedBitstreamFragment *au,
+                                    uint8_t type,
+                                    void *obu_unit)
+{
+    int ret;
+
+    ret = ff_cbs_insert_unit_content(au, -1,
+                                     type, obu_unit, NULL);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to add OBU unit: "
+               "type = %d.\n", type);
+        return ret;
+    }
+
+    return 0;
+}
+
+static int vaapi_encode_av1_write_obu(AVCodecContext *avctx,
+                                      char *data, size_t *data_len,
+                                      CodedBitstreamFragment *bs)
+{
+    VAAPIEncodeAV1Context *priv = avctx->priv_data;
+    int ret;
+
+    ret = ff_cbs_write_fragment_data(priv->cbc, bs);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to write packed header.\n");
+        return ret;
+    }
+
+    if ((size_t)8 * MAX_PARAM_BUFFER_SIZE < 8 * bs->data_size - bs->data_bit_padding) {
+        av_log(avctx, AV_LOG_ERROR, "Access unit too large: "
+               "%zu < %zu.\n", (size_t)8 * MAX_PARAM_BUFFER_SIZE,
+               8 * bs->data_size - bs->data_bit_padding);
+        return AVERROR(ENOSPC);
+    }
+
+    memcpy(data, bs->data, bs->data_size);
+    *data_len = 8 * bs->data_size - bs->data_bit_padding;
+
+    return 0;
+}
+
+static int get_relative_dist(const AV1RawSequenceHeader *seq,
+                             unsigned int a, unsigned int b)
+{
+    unsigned int diff, m;
+    if (!seq->enable_order_hint)
+        return 0;
+    diff = a - b;
+    m = 1 << seq->order_hint_bits_minus_1;
+    diff = (diff & (m - 1)) - (diff & m);
+    return diff;
+}
+
+static int write_ns(PutBitContext *pbc, uint32_t n, uint32_t value)
+{
+    uint32_t w, m, v, extra_bit;
+
+    w = av_log2(n) + 1;
+    m = (1 << w) - n;
+
+    if (value < m) {
+        v = value;
+        put_bits(pbc, w - 1, v);
+    } else {
+        v = m + ((value - m) >> 1);
+        extra_bit = (value - m) & 1;
+        put_bits(pbc, w - 1, v);
+        put_bits(pbc, 1, extra_bit);
+    }
+
+    return 0;
+}
+
+/**
+ * This API provide the minmum implemention according current enabled features
+ * in frame header. If more features will enable in furture, please make sure
+ * the relative flags of features should be packed correctly into frame header
+ * obu in this API.
+ */
+static int vaapi_encode_av1_write_frame_header(AVCodecContext *avctx,
+                                               VAAPIEncodePicture *pic,
+                                               char *data, size_t *data_len)
+{
+    VAAPIEncodeContext              *ctx = avctx->priv_data;
+    VAAPIEncodeAV1Context          *priv = avctx->priv_data;
+    AV1RawOBU                    *fh_obu = &priv->fh;
+    AV1RawOBU                    *sh_obu = &priv->sh;
+    AV1RawFrameHeader                *fh = &fh_obu->obu.frame.header;
+    AV1RawSequenceHeader             *sh = &sh_obu->obu.sequence_header;
+    VAEncPictureParameterBufferAV1 *vpic = pic->codec_picture_params;
+    PutBitContext pbc, pbc_tmp;
+    uint8_t byte;
+    int qindex, coded_lossless;
+    int id_len, frame_is_intra, skip_mode_allowed;
+    int start, payload_bits, obu_size, obu_size_len;
+    int qindex_offset, loopfilter_offset;
+    int cdef_start_offset, cdef_end_offset;
+    int i;
+
+    init_put_bits(&pbc, data, MAX_PARAM_BUFFER_SIZE);
+
+    /** obu header */
+    put_bits(&pbc, 1, fh_obu->header.obu_forbidden_bit);
+    put_bits(&pbc, 4, fh_obu->header.obu_type);
+    put_bits(&pbc, 1, fh_obu->header.obu_extension_flag);
+    put_bits(&pbc, 1, fh_obu->header.obu_has_size_field);
+    put_bits(&pbc, 1, fh_obu->header.obu_reserved_1bit);
+
+    /** record pbc status to re-write obu size later. */
+    if (fh_obu->header.obu_has_size_field) {
+        pbc_tmp = pbc;
+        put_bits32(&pbc, 0);
+        put_bits32(&pbc, 0);
+    }
+
+    start = put_bits_count(&pbc);
+
+    /** uncompressed_header() */
+    if (sh->frame_id_numbers_present_flag)
+        id_len = sh->additional_frame_id_length_minus_1 +
+                 sh->delta_frame_id_length_minus_2 + 3;
+
+    frame_is_intra = (fh->frame_type == AV1_FRAME_KEY ||
+                      fh->frame_type == AV1_FRAME_INTRA_ONLY);
+    if (!sh->reduced_still_picture_header) {
+        put_bits(&pbc, 1, fh->show_existing_frame);
+        if (fh->show_existing_frame) {
+            put_bits(&pbc, 3, fh->frame_to_show_map_idx);
+            goto trailing_bits;
+        }
+        if (sh->frame_id_numbers_present_flag)
+            put_bits(&pbc, id_len, fh->display_frame_id);
+
+        put_bits(&pbc, 2, fh->frame_type);
+        put_bits(&pbc, 1, fh->show_frame);
+        if (!fh->show_frame)
+            put_bits(&pbc, 1, fh->showable_frame);
+
+        if (!(fh->frame_type == AV1_FRAME_SWITCH ||
+            (fh->frame_type == AV1_FRAME_KEY && fh->show_frame)))
+            put_bits(&pbc, 1, fh->error_resilient_mode);
+    }
+
+    put_bits(&pbc, 1, fh->disable_cdf_update);
+    if (sh->seq_force_screen_content_tools == AV1_SELECT_SCREEN_CONTENT_TOOLS)
+        put_bits(&pbc, 1, fh->allow_screen_content_tools);
+
+    if (fh->allow_screen_content_tools && sh->seq_force_integer_mv == AV1_SELECT_INTEGER_MV)
+        put_bits(&pbc, 1, fh->force_integer_mv);
+
+    if (sh->frame_id_numbers_present_flag)
+        put_bits(&pbc, id_len, fh->current_frame_id);
+
+    if (fh->frame_type != AV1_FRAME_SWITCH && !sh->reduced_still_picture_header)
+        put_bits(&pbc, 1, fh->frame_size_override_flag);
+
+    if (sh->enable_order_hint)
+        put_bits(&pbc, sh->order_hint_bits_minus_1 + 1, fh->order_hint);
+
+    if (!(frame_is_intra || fh->error_resilient_mode))
+        put_bits(&pbc, 3, fh->primary_ref_frame);
+
+    if (!(fh->frame_type == AV1_FRAME_SWITCH ||
+        fh->frame_type == AV1_FRAME_KEY && fh->show_frame))
+        put_bits(&pbc, 8, fh->refresh_frame_flags);
+
+    if (frame_is_intra) {
+        /** render_size() */
+         put_bits(&pbc, 1, fh->render_and_frame_size_different);
+    } else {
+        if (!frame_is_intra && sh->enable_order_hint)
+            put_bits(&pbc, 1, fh->frame_refs_short_signaling);
+
+        for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
+            if (!fh->frame_refs_short_signaling)
+                put_bits(&pbc, 3, fh->ref_frame_idx[i]);
+        }
+
+        if (!(fh->frame_size_override_flag && !fh->error_resilient_mode))
+            put_bits(&pbc, 1, fh->render_and_frame_size_different);
+
+        if (!fh->force_integer_mv)
+            put_bits(&pbc, 1, fh->allow_high_precision_mv);
+
+        /** read_interpolation_filter() */
+        put_bits(&pbc, 1, fh->is_filter_switchable);
+        if (!fh->is_filter_switchable)
+            put_bits(&pbc, 2, fh->interpolation_filter);
+
+        put_bits(&pbc, 1, fh->is_motion_mode_switchable);
+    }
+
+    if (!(sh->reduced_still_picture_header || fh->disable_cdf_update))
+        put_bits(&pbc, 1, fh->disable_frame_end_update_cdf);
+
+    /** tile_info() */
+    put_bits(&pbc, 1, fh->uniform_tile_spacing_flag);
+    if (fh->uniform_tile_spacing_flag) {
+        for (i = 0; i < priv->tile_cols_log2 - priv->min_log2_tile_cols; i++) {
+            put_bits(&pbc, 1, 1);
+        }
+        if (priv->tile_cols_log2 < priv->max_log2_tile_cols)
+            put_bits(&pbc, 1, 0);
+
+        for (i = 0; i < priv->tile_rows_log2 - priv->min_log2_tile_rows; i++) {
+            put_bits(&pbc, 1, 1);
+        }
+        if (priv->tile_rows_log2 < priv->max_log2_tile_rows)
+            put_bits(&pbc, 1, 0);
+    } else {
+        int start_sb = 0;
+        int max_width, max_height;
+        for (i = 0; start_sb < priv->sb_cols; i++) {
+            max_width = FFMIN(priv->sb_cols - start_sb, priv->max_tile_width_sb);
+            write_ns(&pbc, max_width, fh->width_in_sbs_minus_1[i]);
+            start_sb += fh->width_in_sbs_minus_1[i] + 1;
+        }
+
+        start_sb = 0;
+        for (i = 0; start_sb < priv->sb_rows; i++) {
+            max_height = FFMIN(priv->sb_rows - start_sb, priv->max_tile_height_sb);
+            write_ns(&pbc, max_height, fh->height_in_sbs_minus_1[i]);
+            start_sb += fh->height_in_sbs_minus_1[i] + 1;
+        }
+    }
+
+    if (priv->tile_cols_log2 || priv->tile_rows_log2) {
+        put_bits(&pbc, priv->tile_cols_log2 + priv->tile_rows_log2, fh->context_update_tile_id);
+        put_bits(&pbc, 2, fh->tile_size_bytes_minus1);
+    }
+
+    qindex_offset = put_bits_count(&pbc);
+    /** quantization_params() */
+    put_bits(&pbc, 8, fh->base_q_idx);
+    put_bits(&pbc, 1, fh->delta_q_y_dc);
+    put_bits(&pbc, 1, fh->delta_q_u_dc);
+    put_bits(&pbc, 1, fh->delta_q_u_ac);
+    put_bits(&pbc, 1, fh->using_qmatrix);
+
+    /** segmentation_params() */
+    put_bits(&pbc, 1, fh->segmentation_enabled);
+
+    /** delta_q_params() */
+    if (fh->base_q_idx)
+        put_bits(&pbc, 1, fh->delta_q_present);
+    if (fh->delta_q_present)
+        put_bits(&pbc, 2, fh->delta_q_res);
+
+    /** delta_lf_params() */
+    if (fh->delta_q_present) {
+        if (!fh->allow_intrabc)
+            put_bits(&pbc, 1, fh->delta_lf_present);
+        if (fh->delta_lf_present) {
+            put_bits(&pbc, 2, fh->delta_lf_res);
+            put_bits(&pbc, 1, fh->delta_lf_multi);
+        }
+    }
+
+    /** codelossless */
+    coded_lossless = 1;
+    for (i = 0; i < AV1_MAX_SEGMENTS; i++) {
+        if (fh->segmentation_enabled && fh->feature_enabled[i][AV1_SEG_LVL_ALT_Q])
+            qindex = fh->base_q_idx + fh->feature_value[i][AV1_SEG_LVL_ALT_Q];
+        else
+            qindex = fh->base_q_idx;
+        qindex = av_clip_uintp2(qindex, 8);
+
+        if (qindex || fh->delta_q_y_dc || fh->delta_q_u_ac || fh->delta_q_u_dc ||
+            fh->delta_q_v_ac || fh->delta_q_v_dc)
+            coded_lossless = 0;
+    }
+
+    loopfilter_offset = put_bits_count(&pbc);
+    /** loop_filter_params() */
+    if (!(coded_lossless || fh->allow_intrabc)) {
+        put_bits(&pbc, 6, fh->loop_filter_level[0]);
+        put_bits(&pbc, 6, fh->loop_filter_level[1]);
+        if (fh->loop_filter_level[0] || fh->loop_filter_level[1]) {
+            put_bits(&pbc, 6, fh->loop_filter_level[2]);
+            put_bits(&pbc, 6, fh->loop_filter_level[3]);
+        }
+        put_bits(&pbc, 3, fh->loop_filter_sharpness);
+        put_bits(&pbc, 1, fh->loop_filter_delta_enabled);
+    }
+
+    cdef_start_offset = put_bits_count(&pbc);
+    /** cdef_params() */
+    if (!(coded_lossless || fh->allow_intrabc || !sh->enable_cdef)) {
+        put_bits(&pbc, 2, fh->cdef_damping_minus_3);
+        put_bits(&pbc, 2, fh->cdef_bits);
+        for (i = 0; i < (1 << fh->cdef_bits); i++) {
+            put_bits(&pbc, 4, fh->cdef_y_pri_strength[i]);
+            put_bits(&pbc, 2, fh->cdef_y_sec_strength[i]);
+            put_bits(&pbc, 4, fh->cdef_uv_pri_strength[i]);
+            put_bits(&pbc, 2, fh->cdef_uv_sec_strength[i]);
+        }
+    }
+    cdef_end_offset = put_bits_count(&pbc);
+
+    /** read_tx_mode() */
+    if (fh->tx_mode == AV1_TX_MODE_SELECT)
+        put_bits(&pbc, 1, 1);
+    else
+        put_bits(&pbc, 1, 0);
+
+    /** frame_reference_mode() */
+    if (!frame_is_intra)
+        put_bits(&pbc, 1, fh->reference_select);
+
+    /** skip_mode_params() */
+    if (frame_is_intra || !fh->reference_select || !sh->enable_order_hint)
+        skip_mode_allowed = 0;
+    else {
+        int forward_idx, backward_idx;
+        int ref_hint, forward_hint, backward_hint;
+
+        forward_idx = -1;
+        backward_idx = -1;
+
+        for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
+            ref_hint = fh->ref_order_hint[fh->ref_frame_idx[i]];
+            if (get_relative_dist(sh, ref_hint, fh->order_hint) < 0) {
+                if (forward_idx < 0 || get_relative_dist(sh, ref_hint, forward_idx) > 0) {
+                    forward_idx = i;
+                    forward_hint = ref_hint;
+                }
+            } else if (get_relative_dist(sh, ref_hint, fh->order_hint) > 0) {
+                if (backward_idx < 0 || get_relative_dist(sh, ref_hint, backward_hint) < 0) {
+                    backward_idx = i;
+                    backward_hint = ref_hint;
+                }
+            }
+        }
+        if (forward_idx < 0)
+            skip_mode_allowed = 0;
+        else if (backward_idx >= 0)
+            skip_mode_allowed = 1;
+        else {
+            int second_forward_idx, second_forward_hint;
+            second_forward_idx = -1;
+            for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
+                ref_hint = fh->ref_order_hint[fh->ref_frame_idx[i]];
+                if (get_relative_dist(sh, ref_hint, forward_hint) < 0) {
+                    if (second_forward_idx < 0 || get_relative_dist(sh, ref_hint, second_forward_hint) > 0){
+                        second_forward_idx = i;
+                        second_forward_hint = ref_hint;
+                    }
+                }
+            }
+            if (second_forward_idx < 0)
+                skip_mode_allowed = 0;
+            else
+                skip_mode_allowed = 1;
+        }
+    }
+
+    if (skip_mode_allowed)
+        put_bits(&pbc, 1, fh->skip_mode_present);
+
+    put_bits(&pbc, 1, fh->reduced_tx_set);
+
+    /** global_motion_params() */
+    if (!frame_is_intra) {
+        for (i = AV1_REF_FRAME_LAST; i <= AV1_REF_FRAME_ALTREF; i++) {
+            put_bits(&pbc, 1, fh->is_global[i]);
+            if (fh->is_global[i]) {
+                put_bits(&pbc, 1, fh->is_rot_zoom[i]);
+                if (!fh->is_rot_zoom[i])
+                    put_bits(&pbc, 1, fh->is_translation[i]);
+            }
+        }
+    }
+
+trailing_bits:
+    payload_bits = put_bits_count(&pbc) - start;
+
+    /** trailing_bits() */
+    put_bits(&pbc, 1, 1);
+    obu_size = (put_bits_count(&pbc) - start + 7) / 8;
+    for (i = 0; i < obu_size * 8 - payload_bits - 1; i++)
+        put_bits(&pbc, 1, 0);
+
+    flush_put_bits(&pbc);
+    *data_len = put_bits_count(&pbc);
+
+    /** update obu size in bitstream */
+    if (fh_obu->header.obu_has_size_field) {
+        obu_size_len = priv->attr_ext2.bits.obu_size_bytes_minus1 + 1;
+        for (i = 0; i < obu_size_len; i++) {
+            byte = obu_size >> (7 * i) & 0x7f;
+            if (i < obu_size_len - 1)
+                byte |= 0x80;
+            put_bits(&pbc_tmp, 8, byte);
+        }
+        flush_put_bits(&pbc_tmp);
+        memmove(pbc_tmp.buf_ptr, pbc_tmp.buf_ptr + (8 - obu_size_len), obu_size);
+        *data_len -= (8 - obu_size_len) * 8;
+    }
+
+    if (fh->show_existing_frame)
+        return 0;
+
+    if (!(ctx->va_rc_mode & VA_RC_CQP)) {
+        vpic->min_base_qindex = av_clip(avctx->qmin, 1, AV1_MAX_QUANT);
+        vpic->max_base_qindex = av_clip(avctx->qmax, 1, AV1_MAX_QUANT);
+
+        vpic->bit_offset_qindex            = qindex_offset - (8 - obu_size_len) * 8;
+        vpic->bit_offset_loopfilter_params = loopfilter_offset - (8 - obu_size_len) * 8;
+        vpic->bit_offset_cdef_params       = cdef_start_offset - (8 - obu_size_len) * 8;
+        vpic->size_in_bits_cdef_params     = cdef_end_offset - cdef_start_offset;
+        vpic->size_in_bits_frame_hdr_obu   = *data_len;
+
+        vpic->byte_offset_frame_hdr_obu_size = (((pic->type == PICTURE_TYPE_IDR) ?
+                                               priv->sh_data_len / 8 : 0) +
+                                               (fh_obu->header.obu_extension_flag ?
+                                               2 : 1));
+    }
+
+    return 0;
+}
+
+static int tile_log2(int blkSize, int target) {
+    int k;
+    for (k = 0; (blkSize << k) < target; k++);
+    return k;
+}
+
+static int vaapi_encode_av1_set_tile(AVCodecContext *avctx)
+{
+    VAAPIEncodeAV1Context *priv = avctx->priv_data;
+    int mi_cols, mi_rows, sb_shift, sb_size;
+    int max_tile_area_sb, max_tile_area_sb_varied;
+    int tile_width_sb, tile_height_sb, widest_tile_sb;
+    int min_log2_tiles;
+    int tile_rows_tmp, i;
+
+    if (priv->tile_cols > AV1_MAX_TILE_COLS ||
+        priv->tile_rows > AV1_MAX_TILE_ROWS) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid tile number %dx%d, should less than %dx%d.\n",
+               priv->tile_cols, priv->tile_rows, AV1_MAX_TILE_COLS, AV1_MAX_TILE_ROWS);
+        return AVERROR(EINVAL);
+    }
+
+    mi_cols = 2 * ((avctx->width + 7) >> 3);
+    mi_rows = 2 * ((avctx->height + 7) >> 3);
+    priv->sb_cols = priv->use_128x128_superblock ?
+                    ((mi_cols + 31) >> 5) : ((mi_cols + 15) >> 4);
+    priv->sb_rows = priv->use_128x128_superblock ?
+                    ((mi_rows + 31) >> 5) : ((mi_rows + 15) >> 4);
+    sb_shift = priv->use_128x128_superblock ? 5 : 4;
+    sb_size  = sb_shift + 2;
+    priv->max_tile_width_sb = AV1_MAX_TILE_WIDTH >> sb_size;
+    max_tile_area_sb = AV1_MAX_TILE_AREA  >> (2 * sb_size);
+
+    priv->min_log2_tile_cols = tile_log2(priv->max_tile_width_sb, priv->sb_cols);
+    priv->max_log2_tile_cols = tile_log2(1, FFMIN(priv->sb_cols, AV1_MAX_TILE_COLS));
+    priv->max_log2_tile_rows = tile_log2(1, FFMIN(priv->sb_rows, AV1_MAX_TILE_ROWS));
+    min_log2_tiles = FFMAX(priv->min_log2_tile_cols,
+                           tile_log2(max_tile_area_sb, priv->sb_rows * priv->sb_cols));
+
+    if (priv->tile_cols != av_clip(priv->tile_cols, (priv->sb_cols + priv->max_tile_width_sb - 1) / priv->max_tile_width_sb, priv->sb_cols)) {
+        priv->tile_cols = av_clip(priv->tile_cols, (priv->sb_cols + priv->max_tile_width_sb - 1) / priv->max_tile_width_sb, priv->sb_cols);
+        av_log(avctx, AV_LOG_WARNING, "Invalid tile cols, correct to %d.\n", priv->tile_cols);
+    }
+
+    priv->tile_cols_log2 = tile_log2(1, priv->tile_cols);
+    tile_width_sb = (priv->sb_cols + (1 << priv->tile_cols_log2) - 1) >>
+                    priv->tile_cols_log2;
+
+    if (priv->tile_rows > priv->sb_rows) {
+        priv->tile_rows = priv->sb_rows;
+        av_log(avctx, AV_LOG_WARNING, "Invalid tile rows, correct to %d.\n", priv->tile_rows);
+    }
+
+    for (tile_rows_tmp = priv->tile_rows; tile_rows_tmp <= priv->sb_rows && tile_rows_tmp <= AV1_MAX_TILE_ROWS; tile_rows_tmp++) {
+        /** try uniformed tile. */
+        priv->tile_rows_log2 = tile_log2(1, tile_rows_tmp);
+        if ((priv->sb_cols + tile_width_sb - 1) / tile_width_sb == priv->tile_cols) {
+            for (i = 0; i < priv->tile_cols - 1; i++)
+                priv->width_in_sbs_minus_1[i] = tile_width_sb - 1;
+            priv->width_in_sbs_minus_1[i] = priv->sb_cols - (priv->tile_cols - 1) * tile_width_sb - 1;
+
+            tile_height_sb = (priv->sb_rows + (1 << priv->tile_rows_log2) - 1) >>
+                             priv->tile_rows_log2;
+
+            if ((priv->sb_rows + tile_height_sb - 1) / tile_height_sb == tile_rows_tmp &&
+                tile_height_sb <= max_tile_area_sb / tile_width_sb) {
+                for (i = 0; i < tile_rows_tmp - 1; i++)
+                    priv->height_in_sbs_minus_1[i] = tile_height_sb - 1;
+                priv->height_in_sbs_minus_1[i] = priv->sb_rows - (tile_rows_tmp - 1) * tile_height_sb - 1;
+
+                priv->uniform_tile = 1;
+                priv->min_log2_tile_rows = FFMAX(min_log2_tiles - priv->tile_cols_log2, 0);
+
+                break;
+            }
+        }
+
+        /** try non-uniformed tile. */
+        widest_tile_sb = 0;
+        for (i = 0; i < priv->tile_cols; i++) {
+            priv->width_in_sbs_minus_1[i] = (i + 1) * priv->sb_cols / priv->tile_cols - i * priv->sb_cols / priv->tile_cols - 1;
+            widest_tile_sb = FFMAX(widest_tile_sb, priv->width_in_sbs_minus_1[i] + 1);
+        }
+
+        if (min_log2_tiles)
+            max_tile_area_sb_varied = (priv->sb_rows * priv->sb_cols) >> (min_log2_tiles + 1);
+        else
+            max_tile_area_sb_varied = priv->sb_rows * priv->sb_cols;
+        priv->max_tile_height_sb = FFMAX(1, max_tile_area_sb_varied / widest_tile_sb);
+
+        if (tile_rows_tmp == av_clip(tile_rows_tmp, (priv->sb_rows + priv->max_tile_height_sb - 1) / priv->max_tile_height_sb, priv->sb_rows)) {
+            for (i = 0; i < tile_rows_tmp; i++)
+                priv->height_in_sbs_minus_1[i] = (i + 1) * priv->sb_rows / tile_rows_tmp - i * priv->sb_rows / tile_rows_tmp - 1;
+
+            break;
+        }
+    }
+
+    if (priv->tile_rows != tile_rows_tmp) {
+        priv->tile_rows = tile_rows_tmp;
+        av_log(avctx, AV_LOG_WARNING, "Invalid tile rows, correct to %d.\n", priv->tile_rows);
+    }
+
+    /** check if tile cols/rows is supported by driver. */
+    if (priv->attr_ext2.bits.max_tile_num_minus1) {
+        if ((priv->tile_cols * priv->tile_rows - 1) > priv->attr_ext2.bits.max_tile_num_minus1) {
+            av_log(avctx, AV_LOG_ERROR, "Unsupported tile num %d * %d = %d by driver, "
+                   "should be less than %d.\n", priv->tile_cols, priv->tile_rows,
+                   priv->tile_cols * priv->tile_rows,
+                   priv->attr_ext2.bits.max_tile_num_minus1 + 1);
+            return AVERROR(EINVAL);
+        }
+    }
+    av_log(avctx, AV_LOG_DEBUG, "Setting tile cols/rows to %d/%d.\n",
+           priv->tile_cols, priv->tile_rows);
+
+    /** check if tile group numbers is valid. */
+    if (priv->tile_groups > priv->tile_cols * priv->tile_rows) {
+        av_log(avctx, AV_LOG_WARNING, "Invalid tile groups number %d, "
+        "correct to %d.\n", priv->tile_groups, priv->tile_cols * priv->tile_rows);
+        priv->tile_groups = priv->tile_cols * priv->tile_rows;
+    }
+
+    return 0;
+}
+
+static int vaapi_encode_av1_write_sequence_header(AVCodecContext *avctx,
+                                                  char *data, size_t *data_len)
+{
+    VAAPIEncodeAV1Context *priv = avctx->priv_data;
+
+    memcpy(data, &priv->sh_data, MAX_PARAM_BUFFER_SIZE * sizeof(char));
+    *data_len = priv->sh_data_len;
+
+    return 0;
+}
+
+static int vaapi_encode_av1_init_sequence_params(AVCodecContext *avctx)
+{
+    VAAPIEncodeContext               *ctx = avctx->priv_data;
+    VAAPIEncodeAV1Context           *priv = avctx->priv_data;
+    AV1RawOBU                     *sh_obu = &priv->sh;
+    AV1RawSequenceHeader              *sh = &sh_obu->obu.sequence_header;
+    VAEncSequenceParameterBufferAV1 *vseq = ctx->codec_sequence_params;
+    CodedBitstreamFragment           *obu = &priv->current_obu;
+    const AVPixFmtDescriptor *desc;
+    int ret;
+
+    memset(sh_obu, 0, sizeof(*sh_obu));
+    sh_obu->header.obu_type = AV1_OBU_SEQUENCE_HEADER;
+
+    desc = av_pix_fmt_desc_get(priv->common.input_frames->sw_format);
+    av_assert0(desc);
+
+    sh->seq_profile  = avctx->profile;
+    if (!sh->seq_force_screen_content_tools)
+        sh->seq_force_integer_mv = AV1_SELECT_INTEGER_MV;
+    sh->frame_width_bits_minus_1  = av_log2(avctx->width);
+    sh->frame_height_bits_minus_1 = av_log2(avctx->height);
+    sh->max_frame_width_minus_1   = avctx->width - 1;
+    sh->max_frame_height_minus_1  = avctx->height - 1;
+    sh->seq_tier[0]               = priv->tier;
+    /** enable order hint and reserve maximum 8 bits for it by default. */
+    sh->enable_order_hint         = 1;
+    sh->order_hint_bits_minus_1   = 7;
+
+    sh->color_config = (AV1RawColorConfig) {
+        .high_bitdepth                  = desc->comp[0].depth == 8 ? 0 : 1,
+        .color_primaries                = avctx->color_primaries,
+        .transfer_characteristics       = avctx->color_trc,
+        .matrix_coefficients            = avctx->colorspace,
+        .color_description_present_flag = (avctx->color_primaries != AVCOL_PRI_UNSPECIFIED ||
+                                           avctx->color_trc       != AVCOL_TRC_UNSPECIFIED ||
+                                           avctx->colorspace      != AVCOL_SPC_UNSPECIFIED),
+        .color_range                    = avctx->color_range == AVCOL_RANGE_JPEG,
+        .subsampling_x                  = desc->log2_chroma_w,
+        .subsampling_y                  = desc->log2_chroma_h,
+    };
+
+    if (avctx->level != FF_LEVEL_UNKNOWN) {
+        sh->seq_level_idx[0] = avctx->level;
+    } else {
+        const AV1LevelDescriptor *level;
+        float framerate;
+
+        if (avctx->framerate.num > 0 && avctx->framerate.den > 0)
+            framerate = avctx->framerate.num / avctx->framerate.den;
+        else
+            framerate = 0;
+
+        level = ff_av1_guess_level(avctx->bit_rate, priv->tier,
+                                   ctx->surface_width, ctx->surface_height,
+                                   priv->tile_rows * priv->tile_cols,
+                                   priv->tile_cols, framerate);
+        if (level) {
+            av_log(avctx, AV_LOG_VERBOSE, "Using level %s.\n", level->name);
+            sh->seq_level_idx[0] = level->level_idx;
+        } else {
+            av_log(avctx, AV_LOG_VERBOSE, "Stream will not conform to "
+                   "any normal level, using level 6.3 by default.\n");
+            sh->seq_level_idx[0] = 19;
+            sh->seq_tier[0] = 1;
+        }
+    }
+    vseq->seq_profile             = sh->seq_profile;
+    vseq->seq_level_idx           = sh->seq_level_idx[0];
+    vseq->seq_tier                = sh->seq_tier[0];
+    vseq->order_hint_bits_minus_1 = sh->order_hint_bits_minus_1;
+    vseq->intra_period            = ctx->gop_size;
+    vseq->ip_period               = ctx->b_per_p + 1;
+
+    vseq->seq_fields.bits.enable_order_hint = sh->enable_order_hint;
+
+    if (!(ctx->va_rc_mode & VA_RC_CQP)) {
+        vseq->bits_per_second = ctx->va_bit_rate;
+        vseq->seq_fields.bits.enable_cdef = sh->enable_cdef = 1;
+    }
+
+    ret = vaapi_encode_av1_add_obu(avctx, obu, AV1_OBU_SEQUENCE_HEADER, &priv->sh);
+    if (ret < 0)
+        goto end;
+
+    ret = vaapi_encode_av1_write_obu(avctx, priv->sh_data, &priv->sh_data_len, obu);
+    if (ret < 0)
+        goto end;
+
+end:
+    ff_cbs_fragment_reset(obu);
+    return ret;
+}
+
+static int vaapi_encode_av1_init_picture_params(AVCodecContext *avctx,
+                                                VAAPIEncodePicture *pic)
+{
+    VAAPIEncodeAV1Context          *priv = avctx->priv_data;
+    VAAPIEncodeAV1Picture          *hpic = pic->priv_data;
+    AV1RawOBU                    *fh_obu = &priv->fh;
+    AV1RawFrameHeader                *fh = &fh_obu->obu.frame.header;
+    VAEncPictureParameterBufferAV1 *vpic = pic->codec_picture_params;
+    CodedBitstreamFragment          *obu = &priv->current_obu;
+    VAAPIEncodePicture    *ref;
+    VAAPIEncodeAV1Picture *href;
+    int slot, i;
+    int ret;
+    static const int8_t default_loop_filter_ref_deltas[AV1_TOTAL_REFS_PER_FRAME] =
+        { 1, 0, 0, 0, -1, 0, -1, -1 };
+
+    memset(fh_obu, 0, sizeof(*fh_obu));
+    pic->nb_slices = priv->tile_groups;
+    fh_obu->header.obu_type = AV1_OBU_FRAME_HEADER;
+    fh_obu->header.obu_has_size_field = 1;
+
+    switch (pic->type) {
+    case PICTURE_TYPE_IDR:
+        av_assert0(pic->nb_refs[0] == 0 || pic->nb_refs[1]);
+        fh->frame_type = AV1_FRAME_KEY;
+        fh->refresh_frame_flags = 0xFF;
+        fh->base_q_idx = priv->q_idx_idr;
+        hpic->slot = 0;
+        hpic->last_idr_frame = pic->display_order;
+        break;
+    case PICTURE_TYPE_P:
+        av_assert0(pic->nb_refs[0]);
+        fh->frame_type = AV1_FRAME_INTER;
+        fh->base_q_idx = priv->q_idx_p;
+        ref = pic->refs[0][pic->nb_refs[0] - 1];
+        href = ref->priv_data;
+        hpic->slot = !href->slot;
+        hpic->last_idr_frame = href->last_idr_frame;
+        fh->refresh_frame_flags = 1 << hpic->slot;
+
+        /** set the nearest frame in L0 as all reference frame. */
+        for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
+            fh->ref_frame_idx[i] = href->slot;
+        }
+        fh->primary_ref_frame = href->slot;
+        fh->ref_order_hint[href->slot] = ref->display_order - href->last_idr_frame;
+        vpic->ref_frame_ctrl_l0.fields.search_idx0 = AV1_REF_FRAME_LAST;
+
+        /** set the 2nd nearest frame in L0 as Golden frame. */
+        if (pic->nb_refs[0] > 1) {
+            ref = pic->refs[0][pic->nb_refs[0] - 2];
+            href = ref->priv_data;
+            fh->ref_frame_idx[3] = href->slot;
+            fh->ref_order_hint[href->slot] = ref->display_order - href->last_idr_frame;
+            vpic->ref_frame_ctrl_l0.fields.search_idx1 = AV1_REF_FRAME_GOLDEN;
+        }
+        break;
+    case PICTURE_TYPE_B:
+        av_assert0(pic->nb_refs[0] && pic->nb_refs[1]);
+        fh->frame_type = AV1_FRAME_INTER;
+        fh->base_q_idx = priv->q_idx_b;
+        fh->refresh_frame_flags = 0x0;
+        fh->reference_select = 1;
+
+        /** B frame will not be referenced, disable its recon frame. */
+        vpic->picture_flags.bits.disable_frame_recon = 1;
+
+        /** Use LAST_FRAME and BWDREF_FRAME for reference. */
+        vpic->ref_frame_ctrl_l0.fields.search_idx0 = AV1_REF_FRAME_LAST;
+        vpic->ref_frame_ctrl_l1.fields.search_idx0 = AV1_REF_FRAME_BWDREF;
+
+        ref                            = pic->refs[0][pic->nb_refs[0] - 1];
+        href                           = ref->priv_data;
+        hpic->last_idr_frame           = href->last_idr_frame;
+        fh->primary_ref_frame          = href->slot;
+        fh->ref_order_hint[href->slot] = ref->display_order - href->last_idr_frame;
+        for (i = 0; i < AV1_REF_FRAME_GOLDEN; i++) {
+            fh->ref_frame_idx[i] = href->slot;
+        }
+
+        ref                            = pic->refs[1][pic->nb_refs[1] - 1];
+        href                           = ref->priv_data;
+        fh->ref_order_hint[href->slot] = ref->display_order - href->last_idr_frame;
+        for (i = AV1_REF_FRAME_GOLDEN; i < AV1_REFS_PER_FRAME; i++) {
+            fh->ref_frame_idx[i] = href->slot;
+        }
+        break;
+    default:
+        av_assert0(0 && "invalid picture type");
+    }
+
+    fh->show_frame                = pic->display_order <= pic->encode_order;
+    fh->showable_frame            = fh->frame_type != AV1_FRAME_KEY;
+    fh->frame_width_minus_1       = avctx->width - 1;
+    fh->frame_height_minus_1      = avctx->height - 1;
+    fh->render_width_minus_1      = fh->frame_width_minus_1;
+    fh->render_height_minus_1     = fh->frame_height_minus_1;
+    fh->order_hint                = pic->display_order - hpic->last_idr_frame;
+    fh->tile_cols                 = priv->tile_cols;
+    fh->tile_rows                 = priv->tile_rows;
+    fh->tile_cols_log2            = priv->tile_cols_log2;
+    fh->tile_rows_log2            = priv->tile_rows_log2;
+    fh->uniform_tile_spacing_flag = priv->uniform_tile;
+    fh->tile_size_bytes_minus1    = priv->attr_ext2.bits.tile_size_bytes_minus1;
+    fh->reduced_tx_set            = 1;
+
+    /** ignore ONLY_4x4 mode for codedlossless is not fully implemented. */
+    if (priv->attr_ext2.bits.tx_mode_support & 0x04)
+        fh->tx_mode = AV1_TX_MODE_SELECT;
+    else if (priv->attr_ext2.bits.tx_mode_support & 0x02)
+        fh->tx_mode = AV1_TX_MODE_LARGEST;
+    else
+        return AVERROR(EINVAL);
+
+    for (i = 0; i < fh->tile_cols; i++)
+        fh->width_in_sbs_minus_1[i] = vpic->width_in_sbs_minus_1[i] = priv->width_in_sbs_minus_1[i];
+
+    for (i = 0; i < fh->tile_rows; i++)
+        fh->height_in_sbs_minus_1[i] = vpic->height_in_sbs_minus_1[i] = priv->height_in_sbs_minus_1[i];
+
+    memcpy(fh->loop_filter_ref_deltas, default_loop_filter_ref_deltas,
+           AV1_TOTAL_REFS_PER_FRAME * sizeof(int8_t));
+
+    if (fh->frame_type == AV1_FRAME_KEY && fh->show_frame) {
+        fh->error_resilient_mode = 1;
+    }
+
+    if (fh->frame_type == AV1_FRAME_KEY || fh->error_resilient_mode)
+        fh->primary_ref_frame = AV1_PRIMARY_REF_NONE;
+
+    vpic->base_qindex          = fh->base_q_idx;
+    vpic->frame_width_minus_1  = fh->frame_width_minus_1;
+    vpic->frame_height_minus_1 = fh->frame_height_minus_1;
+    vpic->primary_ref_frame    = fh->primary_ref_frame;
+    vpic->reconstructed_frame  = pic->recon_surface;
+    vpic->coded_buf            = pic->output_buffer;
+    vpic->tile_cols            = fh->tile_cols;
+    vpic->tile_rows            = fh->tile_rows;
+    vpic->order_hint           = fh->order_hint;
+#if VA_CHECK_VERSION(1, 15, 0)
+    vpic->refresh_frame_flags  = fh->refresh_frame_flags;
+#endif
+
+    vpic->picture_flags.bits.enable_frame_obu     = 0;
+    vpic->picture_flags.bits.frame_type           = fh->frame_type;
+    vpic->picture_flags.bits.reduced_tx_set       = fh->reduced_tx_set;
+    vpic->picture_flags.bits.error_resilient_mode = fh->error_resilient_mode;
+
+    /** let driver decide to use single or compound reference prediction mode. */
+    vpic->mode_control_flags.bits.reference_mode = fh->reference_select ? 2 : 0;
+    vpic->mode_control_flags.bits.tx_mode = fh->tx_mode;
+
+    vpic->tile_group_obu_hdr_info.bits.obu_has_size_field = 1;
+
+    /** set reference. */
+    for (i = 0; i < AV1_REFS_PER_FRAME; i++)
+        vpic->ref_frame_idx[i] = fh->ref_frame_idx[i];
+
+    for (i = 0; i < FF_ARRAY_ELEMS(vpic->reference_frames); i++)
+        vpic->reference_frames[i] = VA_INVALID_SURFACE;
+
+    for (i = 0; i < MAX_REFERENCE_LIST_NUM; i++) {
+        for (int j = 0; j < pic->nb_refs[i]; j++) {
+            VAAPIEncodePicture *ref_pic = pic->refs[i][j];
+
+            slot = ((VAAPIEncodeAV1Picture*)ref_pic->priv_data)->slot;
+            av_assert0(vpic->reference_frames[slot] == VA_INVALID_SURFACE);
+
+            vpic->reference_frames[slot] = ref_pic->recon_surface;
+        }
+    }
+
+    /** pack frame header, and set va params offset like bit_offset_qindex etc. */
+    ret = vaapi_encode_av1_write_frame_header(avctx, pic, priv->fh_data, &priv->fh_data_len);
+    if (ret < 0)
+        goto end;
+
+end:
+    ff_cbs_fragment_reset(obu);
+    return ret;
+}
+
+static int vaapi_encode_av1_init_slice_params(AVCodecContext *avctx,
+                                              VAAPIEncodePicture *pic,
+                                              VAAPIEncodeSlice *slice)
+{
+    VAAPIEncodeAV1Context      *priv = avctx->priv_data;
+    VAEncTileGroupBufferAV1  *vslice = slice->codec_slice_params;
+    CodedBitstreamAV1Context  *cbctx = priv->cbc->priv_data;
+    int div;
+
+    /** Set tile group info. */
+    div = priv->tile_cols * priv->tile_rows / priv->tile_groups;
+    vslice->tg_start = slice->index * div;
+    if (slice->index == (priv->tile_groups - 1)) {
+        vslice->tg_end = priv->tile_cols * priv->tile_rows - 1;
+        cbctx->seen_frame_header = 0;
+    } else {
+        vslice->tg_end = (slice->index + 1) * div - 1;
+    }
+
+    return 0;
+}
+
+static int vaapi_encode_av1_write_picture_header(AVCodecContext *avctx,
+                                                 VAAPIEncodePicture *pic,
+                                                 char *data, size_t *data_len)
+{
+    VAAPIEncodeAV1Context     *priv = avctx->priv_data;
+    CodedBitstreamFragment     *obu = &priv->current_obu;
+    AV1RawOBU               *fh_obu = &priv->fh;
+    AV1RawFrameHeader       *rep_fh = &fh_obu->obu.frame_header;
+    VAAPIEncodeAV1Picture *href;
+    int ret = 0;
+
+    pic->tail_size = 0;
+    /** Pack repeat frame header. */
+    if (pic->display_order > pic->encode_order) {
+        memset(fh_obu, 0, sizeof(*fh_obu));
+        href = pic->refs[0][pic->nb_refs[0] - 1]->priv_data;
+        fh_obu->header.obu_type = AV1_OBU_FRAME_HEADER;
+        fh_obu->header.obu_has_size_field = 1;
+
+        rep_fh->show_existing_frame   = 1;
+        rep_fh->frame_to_show_map_idx = href->slot == 0;
+        rep_fh->frame_type            = AV1_FRAME_INTER;
+        rep_fh->frame_width_minus_1   = avctx->width - 1;
+        rep_fh->frame_height_minus_1  = avctx->height - 1;
+        rep_fh->render_width_minus_1  = rep_fh->frame_width_minus_1;
+        rep_fh->render_height_minus_1 = rep_fh->frame_height_minus_1;
+
+        ret = vaapi_encode_av1_write_frame_header(avctx, pic, pic->tail_data, &pic->tail_size);
+        if (ret < 0)
+            goto end;
+
+        pic->tail_size /= 8;
+    }
+
+    memcpy(data, &priv->fh_data, MAX_PARAM_BUFFER_SIZE * sizeof(char));
+    *data_len = priv->fh_data_len;
+
+end:
+    ff_cbs_fragment_reset(obu);
+    return ret;
+}
+
+static const VAAPIEncodeProfile vaapi_encode_av1_profiles[] = {
+    { FF_PROFILE_AV1_MAIN,  8, 3, 1, 1, VAProfileAV1Profile0 },
+    { FF_PROFILE_AV1_MAIN, 10, 3, 1, 1, VAProfileAV1Profile0 },
+    { FF_PROFILE_UNKNOWN }
+};
+
+static const VAAPIEncodeType vaapi_encode_type_av1 = {
+    .profiles        = vaapi_encode_av1_profiles,
+    .flags           = FLAG_B_PICTURES,
+    .default_quality = 25,
+    .configure       = &vaapi_encode_av1_configure,
+
+    .sequence_header_type  = VAEncPackedHeaderSequence,
+    .sequence_params_size  = sizeof(VAEncSequenceParameterBufferAV1),
+    .init_sequence_params  = &vaapi_encode_av1_init_sequence_params,
+    .write_sequence_header = &vaapi_encode_av1_write_sequence_header,
+
+    .picture_priv_data_size = sizeof(VAAPIEncodeAV1Picture),
+    .picture_header_type    = VAEncPackedHeaderPicture,
+    .picture_params_size    = sizeof(VAEncPictureParameterBufferAV1),
+    .init_picture_params    = &vaapi_encode_av1_init_picture_params,
+    .write_picture_header   = &vaapi_encode_av1_write_picture_header,
+
+    .slice_params_size = sizeof(VAEncTileGroupBufferAV1),
+    .init_slice_params = &vaapi_encode_av1_init_slice_params,
+};
+
+static av_cold int vaapi_encode_av1_init(AVCodecContext *avctx)
+{
+    VAAPIEncodeContext      *ctx = avctx->priv_data;
+    VAAPIEncodeAV1Context  *priv = avctx->priv_data;
+    VAConfigAttrib attr;
+    VAStatus vas;
+    int ret;
+
+    ctx->codec = &vaapi_encode_type_av1;
+
+    ctx->desired_packed_headers =
+        VA_ENC_PACKED_HEADER_SEQUENCE |
+        VA_ENC_PACKED_HEADER_PICTURE;
+
+    if (avctx->profile == FF_PROFILE_UNKNOWN)
+        avctx->profile = priv->profile;
+    if (avctx->level == FF_LEVEL_UNKNOWN)
+        avctx->level = priv->level;
+
+    if (avctx->level != FF_LEVEL_UNKNOWN && avctx->level & ~0x1f) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid level %d\n", avctx->level);
+        return AVERROR(EINVAL);
+    }
+
+    ret = ff_vaapi_encode_init(avctx);
+    if (ret < 0)
+        return ret;
+
+    attr.type = VAConfigAttribEncAV1;
+    vas = vaGetConfigAttributes(ctx->hwctx->display,
+                                ctx->va_profile,
+                                ctx->va_entrypoint,
+                                &attr, 1);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to query "
+               "config attribute: %d (%s).\n", vas, vaErrorStr(vas));
+        return AVERROR_EXTERNAL;
+    } else if (attr.value == VA_ATTRIB_NOT_SUPPORTED) {
+        priv->attr.value = 0;
+        av_log(avctx, AV_LOG_WARNING, "Attribute type:%d is not "
+               "supported.\n", attr.type);
+    } else {
+        priv->attr.value = attr.value;
+    }
+
+    attr.type = VAConfigAttribEncAV1Ext1;
+    vas = vaGetConfigAttributes(ctx->hwctx->display,
+                                ctx->va_profile,
+                                ctx->va_entrypoint,
+                                &attr, 1);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to query "
+               "config attribute: %d (%s).\n", vas, vaErrorStr(vas));
+        return AVERROR_EXTERNAL;
+    } else if (attr.value == VA_ATTRIB_NOT_SUPPORTED) {
+        priv->attr_ext1.value = 0;
+        av_log(avctx, AV_LOG_WARNING, "Attribute type:%d is not "
+               "supported.\n", attr.type);
+    } else {
+        priv->attr_ext1.value = attr.value;
+    }
+
+    /** This attr provides essential indicators, return error if not support. */
+    attr.type = VAConfigAttribEncAV1Ext2;
+    vas = vaGetConfigAttributes(ctx->hwctx->display,
+                                ctx->va_profile,
+                                ctx->va_entrypoint,
+                                &attr, 1);
+    if (vas != VA_STATUS_SUCCESS || attr.value == VA_ATTRIB_NOT_SUPPORTED) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to query "
+               "config attribute: %d (%s).\n", vas, vaErrorStr(vas));
+        return AVERROR_EXTERNAL;
+    } else {
+        priv->attr_ext2.value = attr.value;
+    }
+
+    ret = vaapi_encode_av1_set_tile(avctx);
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
+
+static av_cold int vaapi_encode_av1_close(AVCodecContext *avctx)
+{
+    VAAPIEncodeAV1Context *priv = avctx->priv_data;
+
+    ff_cbs_fragment_free(&priv->current_obu);
+    ff_cbs_close(&priv->cbc);
+
+    return ff_vaapi_encode_close(avctx);
+}
+
+#define OFFSET(x) offsetof(VAAPIEncodeAV1Context, x)
+#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM)
+
+static const AVOption vaapi_encode_av1_options[] = {
+    VAAPI_ENCODE_COMMON_OPTIONS,
+    VAAPI_ENCODE_RC_OPTIONS,
+    { "profile", "Set profile (seq_profile)",
+      OFFSET(profile), AV_OPT_TYPE_INT,
+      { .i64 = FF_PROFILE_UNKNOWN }, FF_PROFILE_UNKNOWN, 0xff, FLAGS, "profile" },
+
+#define PROFILE(name, value)  name, NULL, 0, AV_OPT_TYPE_CONST, \
+    { .i64 = value }, 0, 0, FLAGS, "profile"
+    { PROFILE("main",               FF_PROFILE_AV1_MAIN) },
+    { PROFILE("high",               FF_PROFILE_AV1_HIGH) },
+    { PROFILE("professional",       FF_PROFILE_AV1_PROFESSIONAL) },
+#undef PROFILE
+
+    { "tier", "Set tier (seq_tier)",
+      OFFSET(tier), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS, "tier" },
+    { "main", NULL, 0, AV_OPT_TYPE_CONST,
+      { .i64 = 0 }, 0, 0, FLAGS, "tier" },
+    { "high", NULL, 0, AV_OPT_TYPE_CONST,
+      { .i64 = 1 }, 0, 0, FLAGS, "tier" },
+    { "level", "Set level (seq_level_idx)",
+      OFFSET(level), AV_OPT_TYPE_INT,
+      { .i64 = FF_LEVEL_UNKNOWN }, FF_LEVEL_UNKNOWN, 0x1f, FLAGS, "level" },
+
+#define LEVEL(name, value) name, NULL, 0, AV_OPT_TYPE_CONST, \
+      { .i64 = value }, 0, 0, FLAGS, "level"
+    { LEVEL("2.0",  0) },
+    { LEVEL("2.1",  1) },
+    { LEVEL("3.0",  4) },
+    { LEVEL("3.1",  5) },
+    { LEVEL("4.0",  8) },
+    { LEVEL("4.1",  9) },
+    { LEVEL("5.0", 12) },
+    { LEVEL("5.1", 13) },
+    { LEVEL("5.2", 14) },
+    { LEVEL("5.3", 15) },
+    { LEVEL("6.0", 16) },
+    { LEVEL("6.1", 17) },
+    { LEVEL("6.2", 18) },
+    { LEVEL("6.3", 19) },
+#undef LEVEL
+
+    { "tiles", "Tile columns x rows",
+      OFFSET(tile_cols), AV_OPT_TYPE_IMAGE_SIZE, { .str = "1x1" }, 1, AV1_MAX_TILE_COLS, FLAGS },
+    { "tile_groups", "Number of tile groups for encoding",
+      OFFSET(tile_groups), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, AV1_MAX_TILE_ROWS * AV1_MAX_TILE_COLS, FLAGS },
+
+    { NULL },
+};
+
+static const FFCodecDefault vaapi_encode_av1_defaults[] = {
+    { "b",              "0"   },
+    { "bf",             "7"   },
+    { "g",              "120" },
+    { "qmin",           "1"   },
+    { "qmax",           "255" },
+    { NULL },
+};
+
+static const AVClass vaapi_encode_av1_class = {
+    .class_name = "av1_vaapi",
+    .item_name  = av_default_item_name,
+    .option     = vaapi_encode_av1_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+const FFCodec ff_av1_vaapi_encoder = {
+    .p.name         = "av1_vaapi",
+    CODEC_LONG_NAME("AV1 (VAAPI)"),
+    .p.type         = AVMEDIA_TYPE_VIDEO,
+    .p.id           = AV_CODEC_ID_AV1,
+    .priv_data_size = sizeof(VAAPIEncodeAV1Context),
+    .init           = &vaapi_encode_av1_init,
+    FF_CODEC_RECEIVE_PACKET_CB(&ff_vaapi_encode_receive_packet),
+    .close          = &vaapi_encode_av1_close,
+    .p.priv_class   = &vaapi_encode_av1_class,
+    .p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE |
+                      AV_CODEC_CAP_DR1 | AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE,
+    .caps_internal  = FF_CODEC_CAP_NOT_INIT_THREADSAFE |
+                      FF_CODEC_CAP_INIT_CLEANUP,
+    .defaults       = vaapi_encode_av1_defaults,
+    .p.pix_fmts = (const enum AVPixelFormat[]) {
+        AV_PIX_FMT_VAAPI,
+        AV_PIX_FMT_NONE,
+    },
+    .hw_configs     = ff_vaapi_encode_hw_configs,
+    .p.wrapper_name = "vaapi",
+};