diff mbox series

[FFmpeg-devel,v1,6/6] lavc/vaapi_encode: Add VAAPI AV1 encoder

Message ID 20230710073941.425658-6-fei.w.wang@intel.com
State New
Headers show
Series [FFmpeg-devel,v1,1/6] avcodec/cbs_av1: Add tx mode enum values | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Wang, Fei W July 10, 2023, 7:39 a.m. UTC
Signed-off-by: Fei Wang <fei.w.wang@intel.com>
---
 Changelog                     |    1 +
 configure                     |    3 +
 doc/encoders.texi             |   13 +
 libavcodec/Makefile           |    1 +
 libavcodec/allcodecs.c        |    1 +
 libavcodec/vaapi_encode.c     |  125 +++-
 libavcodec/vaapi_encode.h     |   12 +
 libavcodec/vaapi_encode_av1.c | 1228 +++++++++++++++++++++++++++++++++
 8 files changed, 1366 insertions(+), 18 deletions(-)
 create mode 100644 libavcodec/vaapi_encode_av1.c

Comments

David Rosca July 17, 2023, 11:12 a.m. UTC | #1
On Mon, Jul 10, 2023 at 9:40 AM Fei Wang
<fei.w.wang-at-intel.com@ffmpeg.org> wrote:
>
> Signed-off-by: Fei Wang <fei.w.wang@intel.com>
> ---
>  Changelog                     |    1 +
>  configure                     |    3 +
>  doc/encoders.texi             |   13 +
>  libavcodec/Makefile           |    1 +
>  libavcodec/allcodecs.c        |    1 +
>  libavcodec/vaapi_encode.c     |  125 +++-
>  libavcodec/vaapi_encode.h     |   12 +
>  libavcodec/vaapi_encode_av1.c | 1228 +++++++++++++++++++++++++++++++++
>  8 files changed, 1366 insertions(+), 18 deletions(-)
>  create mode 100644 libavcodec/vaapi_encode_av1.c
>
> diff --git a/Changelog b/Changelog
> index 3876082844..7ae9b85d52 100644
> --- a/Changelog
> +++ b/Changelog
> @@ -25,6 +25,7 @@ version <next>:
>  - Raw VVC bitstream parser, muxer and demuxer
>  - Bitstream filter for editing metadata in VVC streams
>  - Bitstream filter for converting VVC from MP4 to Annex B
> +- VAAPI AV1 encoder
>
>  version 6.0:
>  - Radiance HDR image support
> diff --git a/configure b/configure
> index 0ab0761011..6a1a30aaec 100755
> --- a/configure
> +++ b/configure
> @@ -3323,6 +3323,8 @@ av1_qsv_decoder_select="qsvdec"
>  av1_qsv_encoder_select="qsvenc"
>  av1_qsv_encoder_deps="libvpl"
>  av1_amf_encoder_deps="amf"
> +av1_vaapi_encoder_deps="VAEncPictureParameterBufferAV1"
> +av1_vaapi_encoder_select="cbs_av1 vaapi_encode"
>
>  # parsers
>  aac_parser_select="adts_header mpeg4audio"
> @@ -7106,6 +7108,7 @@ if enabled vaapi; then
>      check_type "va/va.h va/va_enc_jpeg.h" "VAEncPictureParameterBufferJPEG"
>      check_type "va/va.h va/va_enc_vp8.h"  "VAEncPictureParameterBufferVP8"
>      check_type "va/va.h va/va_enc_vp9.h"  "VAEncPictureParameterBufferVP9"
> +    check_type "va/va.h va/va_enc_av1.h"  "VAEncPictureParameterBufferAV1"
>  fi
>
>  if enabled_all opencl libdrm ; then
> diff --git a/doc/encoders.texi b/doc/encoders.texi
> index 25d6b7f09e..fb331ebd8e 100644
> --- a/doc/encoders.texi
> +++ b/doc/encoders.texi
> @@ -3991,6 +3991,19 @@ Average variable bitrate.
>  Each encoder also has its own specific options:
>  @table @option
>
> +@item av1_vaapi
> +@option{profile} sets the value of @emph{seq_profile}.
> +@option{tier} sets the value of @emph{seq_tier}.
> +@option{level} sets the value of @emph{seq_level_idx}.
> +
> +@table @option
> +@item tiles
> +Set the number of tiles to encode the input video with, as columns x rows.
> +(default is 1x1).
> +@item tile_groups
> +Set tile groups number (default is 1).
> +@end table
> +
>  @item h264_vaapi
>  @option{profile} sets the value of @emph{profile_idc} and the @emph{constraint_set*_flag}s.
>  @option{level} sets the value of @emph{level_idc}.
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index 3cd5997e64..fe1e6aa99d 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -259,6 +259,7 @@ OBJS-$(CONFIG_AV1_MEDIACODEC_DECODER)  += mediacodecdec.o
>  OBJS-$(CONFIG_AV1_MEDIACODEC_ENCODER)  += mediacodecenc.o
>  OBJS-$(CONFIG_AV1_NVENC_ENCODER)       += nvenc_av1.o nvenc.o
>  OBJS-$(CONFIG_AV1_QSV_ENCODER)         += qsvenc_av1.o
> +OBJS-$(CONFIG_AV1_VAAPI_ENCODER)       += vaapi_encode_av1.o av1_profile_level.o
>  OBJS-$(CONFIG_AVRN_DECODER)            += avrndec.o
>  OBJS-$(CONFIG_AVRP_DECODER)            += r210dec.o
>  OBJS-$(CONFIG_AVRP_ENCODER)            += r210enc.o
> diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
> index 8775d15a4f..c43c1d7b48 100644
> --- a/libavcodec/allcodecs.c
> +++ b/libavcodec/allcodecs.c
> @@ -844,6 +844,7 @@ extern const FFCodec ff_av1_nvenc_encoder;
>  extern const FFCodec ff_av1_qsv_decoder;
>  extern const FFCodec ff_av1_qsv_encoder;
>  extern const FFCodec ff_av1_amf_encoder;
> +extern const FFCodec ff_av1_vaapi_encoder;
>  extern const FFCodec ff_libopenh264_encoder;
>  extern const FFCodec ff_libopenh264_decoder;
>  extern const FFCodec ff_h264_amf_encoder;
> diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
> index 2604f12b9e..2907e159fb 100644
> --- a/libavcodec/vaapi_encode.c
> +++ b/libavcodec/vaapi_encode.c
> @@ -669,6 +669,15 @@ static int vaapi_encode_set_output_timestamp(AVCodecContext *avctx,
>  {
>      VAAPIEncodeContext *ctx = avctx->priv_data;
>
> +    // AV1 packs P frame and next B frame into one pkt, and uses the other
> +    // repeat frame header pkt at the display order position of the P frame
> +    // to indicate its frame index. Each frame has a corresponding pkt in its
> +    // display order position. So don't need to consider delay for AV1 timestamp.
> +    if (avctx->codec_id == AV_CODEC_ID_AV1) {
> +        pkt->dts = pkt->pts - ctx->dts_pts_diff;
> +        return 0;
> +    }
> +
>      if (ctx->output_delay == 0) {
>          pkt->dts = pkt->pts;
>      } else if (pic->encode_order < ctx->decode_delay) {
> @@ -689,9 +698,10 @@ static int vaapi_encode_output(AVCodecContext *avctx,
>  {
>      VAAPIEncodeContext *ctx = avctx->priv_data;
>      VACodedBufferSegment *buf_list, *buf;
> -    VAStatus vas;
> +    AVPacket *pkt_ptr = pkt;
>      int total_size = 0;
>      uint8_t *ptr;
> +    VAStatus vas;
>      int err;
>
>      err = vaapi_encode_wait(avctx, pic);
> @@ -711,11 +721,52 @@ static int vaapi_encode_output(AVCodecContext *avctx,
>      for (buf = buf_list; buf; buf = buf->next)
>          total_size += buf->size;
>
> -    err = ff_get_encode_buffer(avctx, pkt, total_size, 0);
> -    ptr = pkt->data;
> +    /** repack av1 coded frame for not display and repeat frames */
> +    if (avctx->codec_id == AV_CODEC_ID_AV1) {
> +        int display_frame = pic->display_order <= pic->encode_order;
>
> -    if (err < 0)
> -        goto fail_mapped;
> +        if (display_frame) {
> +            total_size += ctx->header_data_size;
> +            err = ff_get_encode_buffer(avctx, pkt, total_size, 0);
> +            if (err < 0)
> +                goto fail_mapped;
> +            ptr = pkt->data;
> +
> +            if (ctx->header_data_size) {
> +                memcpy(ptr, ctx->header_data, ctx->header_data_size);
> +                ptr += ctx->header_data_size;
> +                ctx->header_data_size = 0;
> +            }
> +        } else {
> +            ctx->header_data = av_realloc(ctx->header_data, total_size);
> +            if (!ctx->header_data) {
> +                err = AVERROR(ENOMEM);
> +                goto fail_mapped;
> +            }
> +            ptr = ctx->header_data;
> +            ctx->header_data_size = total_size;
> +
> +            if (pic->tail_size) {
> +                if (ctx->tail_pkt->size) {
> +                    err = AVERROR(AVERROR_BUG);
> +                    goto fail_mapped;
> +                }
> +
> +                err = ff_get_encode_buffer(avctx, ctx->tail_pkt, pic->tail_size, 0);
> +                if (err < 0)
> +                    goto fail_mapped;
> +
> +                memcpy(ctx->tail_pkt->data, pic->tail_data, pic->tail_size);
> +                pkt_ptr = ctx->tail_pkt;
> +            }
> +        }
> +    } else {
> +        err = ff_get_encode_buffer(avctx, pkt, total_size, 0);
> +        ptr = pkt->data;
> +
> +        if (err < 0)
> +            goto fail_mapped;
> +    }
>
>      for (buf = buf_list; buf; buf = buf->next) {
>          av_log(avctx, AV_LOG_DEBUG, "Output buffer: %u bytes "
> @@ -726,10 +777,10 @@ static int vaapi_encode_output(AVCodecContext *avctx,
>      }
>
>      if (pic->type == PICTURE_TYPE_IDR)
> -        pkt->flags |= AV_PKT_FLAG_KEY;
> +        pkt_ptr->flags |= AV_PKT_FLAG_KEY;
>
> -    pkt->pts = pic->pts;
> -    pkt->duration = pic->duration;
> +    pkt_ptr->pts = pic->pts;
> +    pkt_ptr->duration = pic->duration;
>
>      vas = vaUnmapBuffer(ctx->hwctx->display, pic->output_buffer);
>      if (vas != VA_STATUS_SUCCESS) {
> @@ -742,8 +793,8 @@ static int vaapi_encode_output(AVCodecContext *avctx,
>      // for no-delay encoders this is handled in generic codec
>      if (avctx->codec->capabilities & AV_CODEC_CAP_DELAY &&
>          avctx->flags & AV_CODEC_FLAG_COPY_OPAQUE) {
> -        pkt->opaque     = pic->opaque;
> -        pkt->opaque_ref = pic->opaque_ref;
> +        pkt_ptr->opaque     = pic->opaque;
> +        pkt_ptr->opaque_ref = pic->opaque_ref;
>          pic->opaque_ref = NULL;
>      }
>
> @@ -752,6 +803,9 @@ static int vaapi_encode_output(AVCodecContext *avctx,
>
>      av_log(avctx, AV_LOG_DEBUG, "Output read for pic %"PRId64"/%"PRId64".\n",
>             pic->display_order, pic->encode_order);
> +
> +    vaapi_encode_set_output_timestamp(avctx, pic, pkt_ptr);
> +
>      return 0;
>
>  fail_mapped:
> @@ -1128,9 +1182,19 @@ static int vaapi_encode_pick_next(AVCodecContext *avctx,
>
>      vaapi_encode_add_ref(avctx, pic, pic, 0, 1, 0);
>      if (pic->type != PICTURE_TYPE_IDR) {
> -        vaapi_encode_add_ref(avctx, pic, start,
> -                             pic->type == PICTURE_TYPE_P,
> -                             b_counter > 0, 0);
> +        // TODO: apply both previous and forward multi reference for all vaapi encoders.
> +        // And L0/L1 reference frame number can be set dynamically through query
> +        // VAConfigAttribEncMaxRefFrames attribute.
> +        if (avctx->codec_id == AV_CODEC_ID_AV1) {
> +            for (i = 0; i < ctx->nb_next_prev; i++)
> +                vaapi_encode_add_ref(avctx, pic, ctx->next_prev[i],
> +                                     pic->type == PICTURE_TYPE_P,
> +                                     b_counter > 0, 0);
> +        } else
> +            vaapi_encode_add_ref(avctx, pic, start,
> +                                 pic->type == PICTURE_TYPE_P,
> +                                 b_counter > 0, 0);
> +
>          vaapi_encode_add_ref(avctx, pic, ctx->next_prev[ctx->nb_next_prev - 1], 0, 0, 1);
>      }
>
> @@ -1292,6 +1356,19 @@ int ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
>      AVFrame *frame = ctx->frame;
>      int err;
>
> +start:
> +    /** if no B frame before repeat P frame, sent repeat P frame out. */
> +    if (avctx->codec_id == AV_CODEC_ID_AV1 && ctx->tail_pkt->size) {
> +        for (VAAPIEncodePicture *tmp = ctx->pic_start; tmp; tmp = tmp->next) {
> +            if (tmp->type == PICTURE_TYPE_B && tmp->pts < ctx->tail_pkt->pts)
> +                break;
> +            else if (!tmp->next) {
> +                av_packet_move_ref(pkt, ctx->tail_pkt);
> +                goto end;
> +            }
> +        }
> +    }
> +
>      err = ff_encode_get_frame(avctx, frame);
>      if (err < 0 && err != AVERROR_EOF)
>          return err;
> @@ -1356,17 +1433,21 @@ int ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
>          return err;
>      }
>
> -    vaapi_encode_set_output_timestamp(avctx, pic, pkt);
> -    av_log(avctx, AV_LOG_DEBUG, "Output packet: pts %"PRId64", dts %"PRId64", "
> -           "size %u bytes.\n", pkt->pts, pkt->dts, pkt->size);
> -
>      ctx->output_order = pic->encode_order;
>      vaapi_encode_clear_old(avctx);
>
> +    /** loop to get an available pkt in encoder flushing. */
> +    if (ctx->end_of_stream && !pkt->size)
> +        goto start;
> +
> +end:
> +    if (pkt->size)
> +        av_log(avctx, AV_LOG_DEBUG, "Output packet: pts %"PRId64", dts %"PRId64", "
> +               "size %u bytes.\n", pkt->pts, pkt->dts, pkt->size);
> +
>      return 0;
>  }
>
> -
>  static av_cold void vaapi_encode_add_global_param(AVCodecContext *avctx, int type,
>                                                    void *buffer, size_t size)
>  {
> @@ -2667,6 +2748,12 @@ av_cold int ff_vaapi_encode_init(AVCodecContext *avctx)
>      ctx->device = (AVHWDeviceContext*)ctx->device_ref->data;
>      ctx->hwctx = ctx->device->hwctx;
>
> +    ctx->tail_pkt = av_packet_alloc();
> +    if (!ctx->tail_pkt) {
> +        err = AVERROR(ENOMEM);
> +        goto fail;
> +    }
> +
>      err = vaapi_encode_profile_entrypoint(avctx);
>      if (err < 0)
>          goto fail;
> @@ -2859,9 +2946,11 @@ av_cold int ff_vaapi_encode_close(AVCodecContext *avctx)
>      }
>
>      av_frame_free(&ctx->frame);
> +    av_packet_free(&ctx->tail_pkt);
>
>      av_freep(&ctx->codec_sequence_params);
>      av_freep(&ctx->codec_picture_params);
> +    av_freep(&ctx->header_data);
>      av_fifo_freep2(&ctx->encode_fifo);
>
>      av_buffer_unref(&ctx->recon_frames_ref);
> diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
> index d5452a37b3..03df8d6d46 100644
> --- a/libavcodec/vaapi_encode.h
> +++ b/libavcodec/vaapi_encode.h
> @@ -133,6 +133,11 @@ typedef struct VAAPIEncodePicture {
>
>      int          nb_slices;
>      VAAPIEncodeSlice *slices;
> +
> +    /** Tail data of current pic, used only for repeat header of AV1. */
> +    char tail_data[MAX_PARAM_BUFFER_SIZE];
> +    /** Byte length of tail_data. */
> +    size_t tail_size;
>  } VAAPIEncodePicture;
>
>  typedef struct VAAPIEncodeProfile {
> @@ -367,6 +372,13 @@ typedef struct VAAPIEncodeContext {
>      AVFifo          *encode_fifo;
>      // Max number of frame buffered in encoder.
>      int             async_depth;
> +
> +    /** Head data for current output pkt, used only for AV1. */
> +    void  *header_data;
> +    size_t header_data_size;
> +
> +    /** Store av1 repeat frame header pkt. */
> +    AVPacket *tail_pkt;
>  } VAAPIEncodeContext;
>
>  enum {
> diff --git a/libavcodec/vaapi_encode_av1.c b/libavcodec/vaapi_encode_av1.c
> new file mode 100644
> index 0000000000..1635b25bb7
> --- /dev/null
> +++ b/libavcodec/vaapi_encode_av1.c
> @@ -0,0 +1,1228 @@
> +/*
> + * Copyright (c) 2023 Intel Corporation
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include <va/va.h>
> +#include <va/va_enc_av1.h>
> +
> +#include "libavutil/pixdesc.h"
> +#include "libavutil/opt.h"
> +
> +#include "cbs_av1.h"
> +#include "put_bits.h"
> +#include "codec_internal.h"
> +#include "av1_profile_level.h"
> +#include "vaapi_encode.h"
> +
> +#define AV1_MAX_QUANT 255
> +
> +typedef struct VAAPIEncodeAV1Picture {
> +    int64_t last_idr_frame;
> +    int slot;
> +} VAAPIEncodeAV1Picture;
> +
> +typedef struct VAAPIEncodeAV1Context {
> +    VAAPIEncodeContext common;
> +    AV1RawOBU sh; /**< sequence header.*/
> +    AV1RawOBU fh; /**< frame header.*/
> +    CodedBitstreamContext *cbc;
> +    CodedBitstreamFragment current_obu;
> +    VAConfigAttribValEncAV1 attr;
> +    VAConfigAttribValEncAV1Ext1 attr_ext1;
> +    VAConfigAttribValEncAV1Ext2 attr_ext2;
> +
> +    char sh_data[MAX_PARAM_BUFFER_SIZE]; /**< coded sequence header data. */
> +    size_t sh_data_len; /**< bit length of sh_data. */
> +    char fh_data[MAX_PARAM_BUFFER_SIZE]; /**< coded frame header data. */
> +    size_t fh_data_len; /**< bit length of fh_data. */
> +
> +    uint8_t uniform_tile;
> +    uint8_t use_128x128_superblock;
> +    int sb_cols;
> +    int sb_rows;
> +    int tile_cols_log2;
> +    int tile_rows_log2;
> +    int max_tile_width_sb;
> +    int max_tile_height_sb;
> +    uint8_t width_in_sbs_minus_1[AV1_MAX_TILE_COLS];
> +    uint8_t height_in_sbs_minus_1[AV1_MAX_TILE_ROWS];
> +
> +    int min_log2_tile_cols;
> +    int max_log2_tile_cols;
> +    int min_log2_tile_rows;
> +    int max_log2_tile_rows;
> +
> +    int q_idx_idr;
> +    int q_idx_p;
> +    int q_idx_b;
> +
> +    /** user options */
> +    int profile;
> +    int level;
> +    int tier;
> +    int tile_cols, tile_rows;
> +    int tile_groups;
> +} VAAPIEncodeAV1Context;
> +
> +static av_cold int vaapi_encode_av1_configure(AVCodecContext *avctx)
> +{
> +    VAAPIEncodeContext     *ctx = avctx->priv_data;
> +    VAAPIEncodeAV1Context *priv = avctx->priv_data;
> +    int ret;
> +
> +    ret = ff_cbs_init(&priv->cbc, AV_CODEC_ID_AV1, avctx);
> +    if (ret < 0)
> +        return ret;
> +
> +    if (ctx->rc_mode->quality) {
> +        priv->q_idx_p = av_clip(ctx->rc_quality, 0, AV1_MAX_QUANT);
> +        if (fabs(avctx->i_quant_factor) > 0.0)
> +            priv->q_idx_idr =
> +                av_clip((fabs(avctx->i_quant_factor) * priv->q_idx_p  +
> +                         avctx->i_quant_offset) + 0.5,
> +                        0, AV1_MAX_QUANT);
> +        else
> +            priv->q_idx_idr = priv->q_idx_p;
> +
> +        if (fabs(avctx->b_quant_factor) > 0.0)
> +            priv->q_idx_b =
> +                av_clip((fabs(avctx->b_quant_factor) * priv->q_idx_p  +
> +                         avctx->b_quant_offset) + 0.5,
> +                        0, AV1_MAX_QUANT);
> +        else
> +            priv->q_idx_b = priv->q_idx_p;
> +    } else {
> +        /** Arbitrary value */
> +        priv->q_idx_idr = priv->q_idx_p = priv->q_idx_b = 128;
> +    }
> +
> +    return 0;
> +}
> +
> +static int vaapi_encode_av1_add_obu(AVCodecContext *avctx,
> +                                    CodedBitstreamFragment *au,
> +                                    uint8_t type,
> +                                    void *obu_unit)
> +{
> +    int ret;
> +
> +    ret = ff_cbs_insert_unit_content(au, -1,
> +                                     type, obu_unit, NULL);
> +    if (ret < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to add OBU unit: "
> +               "type = %d.\n", type);
> +        return ret;
> +    }
> +
> +    return 0;
> +}
> +
> +static int vaapi_encode_av1_write_obu(AVCodecContext *avctx,
> +                                      char *data, size_t *data_len,
> +                                      CodedBitstreamFragment *bs)
> +{
> +    VAAPIEncodeAV1Context *priv = avctx->priv_data;
> +    int ret;
> +
> +    ret = ff_cbs_write_fragment_data(priv->cbc, bs);
> +    if (ret < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to write packed header.\n");
> +        return ret;
> +    }
> +
> +    if ((size_t)8 * MAX_PARAM_BUFFER_SIZE < 8 * bs->data_size - bs->data_bit_padding) {
> +        av_log(avctx, AV_LOG_ERROR, "Access unit too large: "
> +               "%zu < %zu.\n", (size_t)8 * MAX_PARAM_BUFFER_SIZE,
> +               8 * bs->data_size - bs->data_bit_padding);
> +        return AVERROR(ENOSPC);
> +    }
> +
> +    memcpy(data, bs->data, bs->data_size);
> +    *data_len = 8 * bs->data_size - bs->data_bit_padding;
> +
> +    return 0;
> +}
> +
> +static int get_relative_dist(const AV1RawSequenceHeader *seq,
> +                             unsigned int a, unsigned int b)
> +{
> +    unsigned int diff, m;
> +    if (!seq->enable_order_hint)
> +        return 0;
> +    diff = a - b;
> +    m = 1 << seq->order_hint_bits_minus_1;
> +    diff = (diff & (m - 1)) - (diff & m);
> +    return diff;
> +}
> +
> +static int write_ns(PutBitContext *pbc, uint32_t n, uint32_t value)
> +{
> +    uint32_t w, m, v, extra_bit;
> +
> +    w = av_log2(n) + 1;
> +    m = (1 << w) - n;
> +
> +    if (value < m) {
> +        v = value;
> +        put_bits(pbc, w - 1, v);
> +    } else {
> +        v = m + ((value - m) >> 1);
> +        extra_bit = (value - m) & 1;
> +        put_bits(pbc, w - 1, v);
> +        put_bits(pbc, 1, extra_bit);
> +    }
> +
> +    return 0;
> +}
> +
> +/**
> + * This API provide the minmum implemention according current enabled features
> + * in frame header. If more features will enable in furture, please make sure
> + * the relative flags of features should be packed correctly into frame header
> + * obu in this API.
> + */
> +static int vaapi_encode_av1_write_frame_header(AVCodecContext *avctx,
> +                                               VAAPIEncodePicture *pic,
> +                                               char *data, size_t *data_len)
> +{
> +    VAAPIEncodeContext              *ctx = avctx->priv_data;
> +    VAAPIEncodeAV1Context          *priv = avctx->priv_data;
> +    AV1RawOBU                    *fh_obu = &priv->fh;
> +    AV1RawOBU                    *sh_obu = &priv->sh;
> +    AV1RawFrameHeader                *fh = &fh_obu->obu.frame.header;
> +    AV1RawSequenceHeader             *sh = &sh_obu->obu.sequence_header;
> +    VAEncPictureParameterBufferAV1 *vpic = pic->codec_picture_params;
> +    PutBitContext pbc, pbc_tmp;
> +    uint8_t byte;
> +    int qindex, coded_lossless;
> +    int id_len, frame_is_intra, skip_mode_allowed;
> +    int start, payload_bits, obu_size, obu_size_len;
> +    int qindex_offset, loopfilter_offset;
> +    int cdef_start_offset, cdef_end_offset;
> +    int i;
> +
> +    init_put_bits(&pbc, data, MAX_PARAM_BUFFER_SIZE);
> +
> +    /** obu header */
> +    put_bits(&pbc, 1, fh_obu->header.obu_forbidden_bit);
> +    put_bits(&pbc, 4, fh_obu->header.obu_type);
> +    put_bits(&pbc, 1, fh_obu->header.obu_extension_flag);
> +    put_bits(&pbc, 1, fh_obu->header.obu_has_size_field);
> +    put_bits(&pbc, 1, fh_obu->header.obu_reserved_1bit);
> +
> +    /** record pbc status to re-write obu size later. */
> +    if (fh_obu->header.obu_has_size_field) {
> +        pbc_tmp = pbc;
> +        put_bits32(&pbc, 0);
> +        put_bits32(&pbc, 0);
> +    }
> +
> +    start = put_bits_count(&pbc);
> +
> +    /** uncompressed_header() */
> +    if (sh->frame_id_numbers_present_flag)
> +        id_len = sh->additional_frame_id_length_minus_1 +
> +                 sh->delta_frame_id_length_minus_2 + 3;
> +
> +    frame_is_intra = (fh->frame_type == AV1_FRAME_KEY ||
> +                      fh->frame_type == AV1_FRAME_INTRA_ONLY);
> +    if (!sh->reduced_still_picture_header) {
> +        put_bits(&pbc, 1, fh->show_existing_frame);
> +        if (fh->show_existing_frame) {
> +            put_bits(&pbc, 3, fh->frame_to_show_map_idx);
> +            goto trailing_bits;
> +        }
> +        if (sh->frame_id_numbers_present_flag)
> +            put_bits(&pbc, id_len, fh->display_frame_id);
> +
> +        put_bits(&pbc, 2, fh->frame_type);
> +        put_bits(&pbc, 1, fh->show_frame);
> +        if (!fh->show_frame)
> +            put_bits(&pbc, 1, fh->showable_frame);
> +
> +        if (!(fh->frame_type == AV1_FRAME_SWITCH ||
> +            (fh->frame_type == AV1_FRAME_KEY && fh->show_frame)))
> +            put_bits(&pbc, 1, fh->error_resilient_mode);
> +    }
> +
> +    put_bits(&pbc, 1, fh->disable_cdf_update);
> +    if (sh->seq_force_screen_content_tools == AV1_SELECT_SCREEN_CONTENT_TOOLS)
> +        put_bits(&pbc, 1, fh->allow_screen_content_tools);
> +
> +    if (fh->allow_screen_content_tools && sh->seq_force_integer_mv == AV1_SELECT_INTEGER_MV)
> +        put_bits(&pbc, 1, fh->force_integer_mv);
> +
> +    if (sh->frame_id_numbers_present_flag)
> +        put_bits(&pbc, id_len, fh->current_frame_id);
> +
> +    if (fh->frame_type != AV1_FRAME_SWITCH && !sh->reduced_still_picture_header)
> +        put_bits(&pbc, 1, fh->frame_size_override_flag);
> +
> +    if (sh->enable_order_hint)
> +        put_bits(&pbc, sh->order_hint_bits_minus_1 + 1, fh->order_hint);
> +
> +    if (!(frame_is_intra || fh->error_resilient_mode))
> +        put_bits(&pbc, 3, fh->primary_ref_frame);
> +
> +    if (!(fh->frame_type == AV1_FRAME_SWITCH ||
> +        fh->frame_type == AV1_FRAME_KEY && fh->show_frame))
> +        put_bits(&pbc, 8, fh->refresh_frame_flags);
> +
> +    if (frame_is_intra) {
> +        /** render_size() */
> +         put_bits(&pbc, 1, fh->render_and_frame_size_different);
> +    } else {
> +        if (!frame_is_intra && sh->enable_order_hint)
> +            put_bits(&pbc, 1, fh->frame_refs_short_signaling);
> +
> +        for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
> +            if (!fh->frame_refs_short_signaling)
> +                put_bits(&pbc, 3, fh->ref_frame_idx[i]);
> +        }
> +
> +        if (!(fh->frame_size_override_flag && !fh->error_resilient_mode))
> +            put_bits(&pbc, 1, fh->render_and_frame_size_different);
> +
> +        if (!fh->force_integer_mv)
> +            put_bits(&pbc, 1, fh->allow_high_precision_mv);
> +
> +        /** read_interpolation_filter() */
> +        put_bits(&pbc, 1, fh->is_filter_switchable);
> +        if (!fh->is_filter_switchable)
> +            put_bits(&pbc, 2, fh->interpolation_filter);
> +
> +        put_bits(&pbc, 1, fh->is_motion_mode_switchable);
> +    }
> +
> +    if (!(sh->reduced_still_picture_header || fh->disable_cdf_update))
> +        put_bits(&pbc, 1, fh->disable_frame_end_update_cdf);
> +
> +    /** tile_info() */
> +    put_bits(&pbc, 1, fh->uniform_tile_spacing_flag);
> +    if (fh->uniform_tile_spacing_flag) {
> +        for (i = 0; i < priv->tile_cols_log2 - priv->min_log2_tile_cols; i++) {
> +            put_bits(&pbc, 1, 1);
> +        }
> +        if (priv->tile_cols_log2 < priv->max_log2_tile_cols)
> +            put_bits(&pbc, 1, 0);
> +
> +        for (i = 0; i < priv->tile_rows_log2 - priv->min_log2_tile_rows; i++) {
> +            put_bits(&pbc, 1, 1);
> +        }
> +        if (priv->tile_rows_log2 < priv->max_log2_tile_rows)
> +            put_bits(&pbc, 1, 0);
> +    } else {
> +        int start_sb = 0;
> +        int max_width, max_height;
> +        for (i = 0; start_sb < priv->sb_cols; i++) {
> +            max_width = FFMIN(priv->sb_cols - start_sb, priv->max_tile_width_sb);
> +            write_ns(&pbc, max_width, fh->width_in_sbs_minus_1[i]);
> +            start_sb += fh->width_in_sbs_minus_1[i] + 1;
> +        }
> +
> +        start_sb = 0;
> +        for (i = 0; start_sb < priv->sb_rows; i++) {
> +            max_height = FFMIN(priv->sb_rows - start_sb, priv->max_tile_height_sb);
> +            write_ns(&pbc, max_height, fh->height_in_sbs_minus_1[i]);
> +            start_sb += fh->height_in_sbs_minus_1[i] + 1;
> +        }
> +    }
> +
> +    if (priv->tile_cols_log2 || priv->tile_rows_log2) {
> +        put_bits(&pbc, priv->tile_cols_log2 + priv->tile_rows_log2, fh->context_update_tile_id);
> +        put_bits(&pbc, 2, fh->tile_size_bytes_minus1);
> +    }
> +
> +    qindex_offset = put_bits_count(&pbc);
> +    /** quantization_params() */
> +    put_bits(&pbc, 8, fh->base_q_idx);
> +    put_bits(&pbc, 1, fh->delta_q_y_dc);
> +    put_bits(&pbc, 1, fh->delta_q_u_dc);
> +    put_bits(&pbc, 1, fh->delta_q_u_ac);
> +    put_bits(&pbc, 1, fh->using_qmatrix);
> +
> +    /** segmentation_params() */
> +    put_bits(&pbc, 1, fh->segmentation_enabled);
> +
> +    /** delta_q_params() */
> +    if (fh->base_q_idx)
> +        put_bits(&pbc, 1, fh->delta_q_present);
> +    if (fh->delta_q_present)
> +        put_bits(&pbc, 2, fh->delta_q_res);
> +
> +    /** delta_lf_params() */
> +    if (fh->delta_q_present) {
> +        if (!fh->allow_intrabc)
> +            put_bits(&pbc, 1, fh->delta_lf_present);
> +        if (fh->delta_lf_present) {
> +            put_bits(&pbc, 2, fh->delta_lf_res);
> +            put_bits(&pbc, 1, fh->delta_lf_multi);
> +        }
> +    }
> +
> +    /** codelossless */
> +    coded_lossless = 1;
> +    for (i = 0; i < AV1_MAX_SEGMENTS; i++) {
> +        if (fh->segmentation_enabled && fh->feature_enabled[i][AV1_SEG_LVL_ALT_Q])
> +            qindex = fh->base_q_idx + fh->feature_value[i][AV1_SEG_LVL_ALT_Q];
> +        else
> +            qindex = fh->base_q_idx;
> +        qindex = av_clip_uintp2(qindex, 8);
> +
> +        if (qindex || fh->delta_q_y_dc || fh->delta_q_u_ac || fh->delta_q_u_dc ||
> +            fh->delta_q_v_ac || fh->delta_q_v_dc)
> +            coded_lossless = 0;
> +    }
> +
> +    loopfilter_offset = put_bits_count(&pbc);
> +    /** loop_filter_params() */
> +    if (!(coded_lossless || fh->allow_intrabc)) {
> +        put_bits(&pbc, 6, fh->loop_filter_level[0]);
> +        put_bits(&pbc, 6, fh->loop_filter_level[1]);
> +        if (fh->loop_filter_level[0] || fh->loop_filter_level[1]) {
> +            put_bits(&pbc, 6, fh->loop_filter_level[2]);
> +            put_bits(&pbc, 6, fh->loop_filter_level[3]);
> +        }
> +        put_bits(&pbc, 3, fh->loop_filter_sharpness);
> +        put_bits(&pbc, 1, fh->loop_filter_delta_enabled);
> +    }
> +
> +    cdef_start_offset = put_bits_count(&pbc);
> +    /** cdef_params() */
> +    if (!(coded_lossless || fh->allow_intrabc || !sh->enable_cdef)) {
> +        put_bits(&pbc, 2, fh->cdef_damping_minus_3);
> +        put_bits(&pbc, 2, fh->cdef_bits);
> +        for (i = 0; i < (1 << fh->cdef_bits); i++) {
> +            put_bits(&pbc, 4, fh->cdef_y_pri_strength[i]);
> +            put_bits(&pbc, 2, fh->cdef_y_sec_strength[i]);
> +            put_bits(&pbc, 4, fh->cdef_uv_pri_strength[i]);
> +            put_bits(&pbc, 2, fh->cdef_uv_sec_strength[i]);
> +        }
> +    }
> +    cdef_end_offset = put_bits_count(&pbc);
> +
> +    /** read_tx_mode() */
> +    if (fh->tx_mode == AV1_TX_MODE_SELECT)
> +        put_bits(&pbc, 1, 1);
> +    else
> +        put_bits(&pbc, 1, 0);
> +
> +    /** frame_reference_mode() */
> +    if (!frame_is_intra)
> +        put_bits(&pbc, 1, fh->reference_select);
> +
> +    /** skip_mode_params() */
> +    if (frame_is_intra || !fh->reference_select || !sh->enable_order_hint)
> +        skip_mode_allowed = 0;
> +    else {
> +        int forward_idx, backward_idx;
> +        int ref_hint, forward_hint, backward_hint;
> +
> +        forward_idx = -1;
> +        backward_idx = -1;
> +
> +        for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
> +            ref_hint = fh->ref_order_hint[fh->ref_frame_idx[i]];
> +            if (get_relative_dist(sh, ref_hint, fh->order_hint) < 0) {
> +                if (forward_idx < 0 || get_relative_dist(sh, ref_hint, forward_idx) > 0) {
> +                    forward_idx = i;
> +                    forward_hint = ref_hint;
> +                }
> +            } else if (get_relative_dist(sh, ref_hint, fh->order_hint) > 0) {
> +                if (backward_idx < 0 || get_relative_dist(sh, ref_hint, backward_hint) < 0) {
> +                    backward_idx = i;
> +                    backward_hint = ref_hint;
> +                }
> +            }
> +        }
> +        if (forward_idx < 0)
> +            skip_mode_allowed = 0;
> +        else if (backward_idx >= 0)
> +            skip_mode_allowed = 1;
> +        else {
> +            int second_forward_idx, second_forward_hint;
> +            second_forward_idx = -1;
> +            for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
> +                ref_hint = fh->ref_order_hint[fh->ref_frame_idx[i]];
> +                if (get_relative_dist(sh, ref_hint, forward_hint) < 0) {
> +                    if (second_forward_idx < 0 || get_relative_dist(sh, ref_hint, second_forward_hint) > 0){
> +                        second_forward_idx = i;
> +                        second_forward_hint = ref_hint;
> +                    }
> +                }
> +            }
> +            if (second_forward_idx < 0)
> +                skip_mode_allowed = 0;
> +            else
> +                skip_mode_allowed = 1;
> +        }
> +    }
> +
> +    if (skip_mode_allowed)
> +        put_bits(&pbc, 1, fh->skip_mode_present);
> +
> +    put_bits(&pbc, 1, fh->reduced_tx_set);
> +
> +    /** global_motion_params() */
> +    if (!frame_is_intra) {
> +        for (i = AV1_REF_FRAME_LAST; i <= AV1_REF_FRAME_ALTREF; i++) {
> +            put_bits(&pbc, 1, fh->is_global[i]);
> +            if (fh->is_global[i]) {
> +                put_bits(&pbc, 1, fh->is_rot_zoom[i]);
> +                if (!fh->is_rot_zoom[i])
> +                    put_bits(&pbc, 1, fh->is_translation[i]);
> +            }
> +        }
> +    }
> +
> +trailing_bits:
> +    payload_bits = put_bits_count(&pbc) - start;
> +
> +    /** trailing_bits() */
> +    put_bits(&pbc, 1, 1);
> +    obu_size = (put_bits_count(&pbc) - start + 7) / 8;
> +    for (i = 0; i < obu_size * 8 - payload_bits - 1; i++)
> +        put_bits(&pbc, 1, 0);
> +
> +    flush_put_bits(&pbc);
> +    *data_len = put_bits_count(&pbc);
> +
> +    /** update obu size in bitstream */
> +    if (fh_obu->header.obu_has_size_field) {
> +        obu_size_len = priv->attr_ext2.bits.obu_size_bytes_minus1 + 1;
> +        for (i = 0; i < obu_size_len; i++) {
> +            byte = obu_size >> (7 * i) & 0x7f;
> +            if (i < obu_size_len - 1)
> +                byte |= 0x80;
> +            put_bits(&pbc_tmp, 8, byte);
> +        }
> +        flush_put_bits(&pbc_tmp);
> +        memmove(pbc_tmp.buf_ptr, pbc_tmp.buf_ptr + (8 - obu_size_len), obu_size);
> +        *data_len -= (8 - obu_size_len) * 8;
> +    }
> +
> +    if (fh->show_existing_frame)
> +        return 0;
> +
> +    if (!(ctx->va_rc_mode & VA_RC_CQP)) {
> +        vpic->min_base_qindex = av_clip(avctx->qmin, 1, AV1_MAX_QUANT);
> +        vpic->max_base_qindex = av_clip(avctx->qmax, 1, AV1_MAX_QUANT);
> +
> +        vpic->bit_offset_qindex            = qindex_offset - (8 - obu_size_len) * 8;
> +        vpic->bit_offset_loopfilter_params = loopfilter_offset - (8 - obu_size_len) * 8;
> +        vpic->bit_offset_cdef_params       = cdef_start_offset - (8 - obu_size_len) * 8;
> +        vpic->size_in_bits_cdef_params     = cdef_end_offset - cdef_start_offset;
> +        vpic->size_in_bits_frame_hdr_obu   = *data_len;
> +
> +        vpic->byte_offset_frame_hdr_obu_size = (((pic->type == PICTURE_TYPE_IDR) ?
> +                                               priv->sh_data_len / 8 : 0) +
> +                                               (fh_obu->header.obu_extension_flag ?
> +                                               2 : 1));
> +    }
> +
> +    return 0;
> +}
> +
> +static int tile_log2(int blkSize, int target) {
> +    int k;
> +    for (k = 0; (blkSize << k) < target; k++);
> +    return k;
> +}
> +
> +static int vaapi_encode_av1_set_tile(AVCodecContext *avctx)
> +{
> +    VAAPIEncodeAV1Context *priv = avctx->priv_data;
> +    int mi_cols, mi_rows, sb_shift, sb_size;
> +    int max_tile_area_sb, max_tile_area_sb_varied;
> +    int tile_width_sb, tile_height_sb, widest_tile_sb;
> +    int min_log2_tiles;
> +    int tile_rows_tmp, i;
> +
> +    if (priv->tile_cols > AV1_MAX_TILE_COLS ||
> +        priv->tile_rows > AV1_MAX_TILE_ROWS) {
> +        av_log(avctx, AV_LOG_ERROR, "Invalid tile number %dx%d, should less than %dx%d.\n",
> +               priv->tile_cols, priv->tile_rows, AV1_MAX_TILE_COLS, AV1_MAX_TILE_ROWS);
> +        return AVERROR(EINVAL);
> +    }
> +
> +    mi_cols = 2 * ((avctx->width + 7) >> 3);
> +    mi_rows = 2 * ((avctx->height + 7) >> 3);
> +    priv->sb_cols = priv->use_128x128_superblock ?
> +                    ((mi_cols + 31) >> 5) : ((mi_cols + 15) >> 4);
> +    priv->sb_rows = priv->use_128x128_superblock ?
> +                    ((mi_rows + 31) >> 5) : ((mi_rows + 15) >> 4);
> +    sb_shift = priv->use_128x128_superblock ? 5 : 4;
> +    sb_size  = sb_shift + 2;
> +    priv->max_tile_width_sb = AV1_MAX_TILE_WIDTH >> sb_size;
> +    max_tile_area_sb = AV1_MAX_TILE_AREA  >> (2 * sb_size);
> +
> +    priv->min_log2_tile_cols = tile_log2(priv->max_tile_width_sb, priv->sb_cols);
> +    priv->max_log2_tile_cols = tile_log2(1, FFMIN(priv->sb_cols, AV1_MAX_TILE_COLS));
> +    priv->max_log2_tile_rows = tile_log2(1, FFMIN(priv->sb_rows, AV1_MAX_TILE_ROWS));
> +    min_log2_tiles = FFMAX(priv->min_log2_tile_cols,
> +                           tile_log2(max_tile_area_sb, priv->sb_rows * priv->sb_cols));
> +
> +    if (priv->tile_cols != av_clip(priv->tile_cols, (priv->sb_cols + priv->max_tile_width_sb - 1) / priv->max_tile_width_sb, priv->sb_cols)) {
> +        priv->tile_cols = av_clip(priv->tile_cols, (priv->sb_cols + priv->max_tile_width_sb - 1) / priv->max_tile_width_sb, priv->sb_cols);
> +        av_log(avctx, AV_LOG_WARNING, "Invalid tile cols, correct to %d.\n", priv->tile_cols);
> +    }
> +
> +    priv->tile_cols_log2 = tile_log2(1, priv->tile_cols);
> +    tile_width_sb = (priv->sb_cols + (1 << priv->tile_cols_log2) - 1) >>
> +                    priv->tile_cols_log2;
> +
> +    if (priv->tile_rows > priv->sb_rows) {
> +        priv->tile_rows = priv->sb_rows;
> +        av_log(avctx, AV_LOG_WARNING, "Invalid tile rows, correct to %d.\n", priv->tile_rows);
> +    }
> +
> +    for (tile_rows_tmp = priv->tile_rows; tile_rows_tmp <= priv->sb_rows && tile_rows_tmp <= AV1_MAX_TILE_ROWS; tile_rows_tmp++) {
> +        /** try uniformed tile. */
> +        priv->tile_rows_log2 = tile_log2(1, tile_rows_tmp);
> +        if ((priv->sb_cols + tile_width_sb - 1) / tile_width_sb == priv->tile_cols) {
> +            for (i = 0; i < priv->tile_cols - 1; i++)
> +                priv->width_in_sbs_minus_1[i] = tile_width_sb - 1;
> +            priv->width_in_sbs_minus_1[i] = priv->sb_cols - (priv->tile_cols - 1) * tile_width_sb - 1;
> +
> +            tile_height_sb = (priv->sb_rows + (1 << priv->tile_rows_log2) - 1) >>
> +                             priv->tile_rows_log2;
> +
> +            if ((priv->sb_rows + tile_height_sb - 1) / tile_height_sb == tile_rows_tmp &&
> +                tile_height_sb <= max_tile_area_sb / tile_width_sb) {
> +                for (i = 0; i < tile_rows_tmp - 1; i++)
> +                    priv->height_in_sbs_minus_1[i] = tile_height_sb - 1;
> +                priv->height_in_sbs_minus_1[i] = priv->sb_rows - (tile_rows_tmp - 1) * tile_height_sb - 1;
> +
> +                priv->uniform_tile = 1;
> +                priv->min_log2_tile_rows = FFMAX(min_log2_tiles - priv->tile_cols_log2, 0);
> +
> +                break;
> +            }
> +        }
> +
> +        /** try non-uniformed tile. */
> +        widest_tile_sb = 0;
> +        for (i = 0; i < priv->tile_cols; i++) {
> +            priv->width_in_sbs_minus_1[i] = (i + 1) * priv->sb_cols / priv->tile_cols - i * priv->sb_cols / priv->tile_cols - 1;
> +            widest_tile_sb = FFMAX(widest_tile_sb, priv->width_in_sbs_minus_1[i] + 1);
> +        }
> +
> +        if (min_log2_tiles)
> +            max_tile_area_sb_varied = (priv->sb_rows * priv->sb_cols) >> (min_log2_tiles + 1);
> +        else
> +            max_tile_area_sb_varied = priv->sb_rows * priv->sb_cols;
> +        priv->max_tile_height_sb = FFMAX(1, max_tile_area_sb_varied / widest_tile_sb);
> +
> +        if (tile_rows_tmp == av_clip(tile_rows_tmp, (priv->sb_rows + priv->max_tile_height_sb - 1) / priv->max_tile_height_sb, priv->sb_rows)) {
> +            for (i = 0; i < tile_rows_tmp; i++)
> +                priv->height_in_sbs_minus_1[i] = (i + 1) * priv->sb_rows / tile_rows_tmp - i * priv->sb_rows / tile_rows_tmp - 1;
> +
> +            break;
> +        }
> +    }
> +
> +    if (priv->tile_rows != tile_rows_tmp) {
> +        priv->tile_rows = tile_rows_tmp;
> +        av_log(avctx, AV_LOG_WARNING, "Invalid tile rows, correct to %d.\n", priv->tile_rows);
> +    }
> +
> +    /** check if tile cols/rows is supported by driver. */
> +    if (priv->attr_ext2.bits.max_tile_num_minus1) {
> +        if ((priv->tile_cols * priv->tile_rows - 1) > priv->attr_ext2.bits.max_tile_num_minus1) {
> +            av_log(avctx, AV_LOG_ERROR, "Unsupported tile num %d * %d = %d by driver, "
> +                   "should be less than %d.\n", priv->tile_cols, priv->tile_rows,
> +                   priv->tile_cols * priv->tile_rows,
> +                   priv->attr_ext2.bits.max_tile_num_minus1 + 1);
> +            return AVERROR(EINVAL);
> +        }
> +    }
> +    av_log(avctx, AV_LOG_DEBUG, "Setting tile cols/rows to %d/%d.\n",
> +           priv->tile_cols, priv->tile_rows);
> +
> +    /** check if tile group numbers is valid. */
> +    if (priv->tile_groups > priv->tile_cols * priv->tile_rows) {
> +        av_log(avctx, AV_LOG_WARNING, "Invalid tile groups number %d, "
> +        "correct to %d.\n", priv->tile_groups, priv->tile_cols * priv->tile_rows);
> +        priv->tile_groups = priv->tile_cols * priv->tile_rows;
> +    }
> +
> +    return 0;
> +}
> +
> +static int vaapi_encode_av1_write_sequence_header(AVCodecContext *avctx,
> +                                                  char *data, size_t *data_len)
> +{
> +    VAAPIEncodeAV1Context *priv = avctx->priv_data;
> +
> +    memcpy(data, &priv->sh_data, MAX_PARAM_BUFFER_SIZE * sizeof(char));
> +    *data_len = priv->sh_data_len;
> +
> +    return 0;
> +}
> +
> +static int vaapi_encode_av1_init_sequence_params(AVCodecContext *avctx)
> +{
> +    VAAPIEncodeContext               *ctx = avctx->priv_data;
> +    VAAPIEncodeAV1Context           *priv = avctx->priv_data;
> +    AV1RawOBU                     *sh_obu = &priv->sh;
> +    AV1RawSequenceHeader              *sh = &sh_obu->obu.sequence_header;
> +    VAEncSequenceParameterBufferAV1 *vseq = ctx->codec_sequence_params;
> +    CodedBitstreamFragment           *obu = &priv->current_obu;
> +    const AVPixFmtDescriptor *desc;
> +    int ret;
> +
> +    memset(sh_obu, 0, sizeof(*sh_obu));
> +    sh_obu->header.obu_type = AV1_OBU_SEQUENCE_HEADER;
> +
> +    desc = av_pix_fmt_desc_get(priv->common.input_frames->sw_format);
> +    av_assert0(desc);
> +
> +    sh->seq_profile  = avctx->profile;
> +    if (!sh->seq_force_screen_content_tools)
> +        sh->seq_force_integer_mv = AV1_SELECT_INTEGER_MV;
> +    sh->frame_width_bits_minus_1  = av_log2(avctx->width);
> +    sh->frame_height_bits_minus_1 = av_log2(avctx->height);
> +    sh->max_frame_width_minus_1   = avctx->width - 1;
> +    sh->max_frame_height_minus_1  = avctx->height - 1;
> +    sh->seq_tier[0]               = priv->tier;
> +    /** enable order hint and reserve maximum 8 bits for it by default. */
> +    sh->enable_order_hint         = 1;
> +    sh->order_hint_bits_minus_1   = 7;
> +
> +    sh->color_config = (AV1RawColorConfig) {
> +        .high_bitdepth                  = desc->comp[0].depth == 8 ? 0 : 1,
> +        .color_primaries                = avctx->color_primaries,
> +        .transfer_characteristics       = avctx->color_trc,
> +        .matrix_coefficients            = avctx->colorspace,
> +        .color_description_present_flag = (avctx->color_primaries != AVCOL_PRI_UNSPECIFIED ||
> +                                           avctx->color_trc       != AVCOL_TRC_UNSPECIFIED ||
> +                                           avctx->colorspace      != AVCOL_SPC_UNSPECIFIED),

This should also set color_range:

        .color_range                    = avctx->color_range ==
AVCOL_RANGE_JPEG,


> +        .subsampling_x                  = desc->log2_chroma_w,
> +        .subsampling_y                  = desc->log2_chroma_h,
> +    };
> +
> +    if (avctx->level != FF_LEVEL_UNKNOWN) {
> +        sh->seq_level_idx[0] = avctx->level;
> +    } else {
> +        const AV1LevelDescriptor *level;
> +        float framerate;
> +
> +        if (avctx->framerate.num > 0 && avctx->framerate.den > 0)
> +            framerate = avctx->framerate.num / avctx->framerate.den;
> +        else
> +            framerate = 0;
> +
> +        level = ff_av1_guess_level(avctx->bit_rate, priv->tier,
> +                                   ctx->surface_width, ctx->surface_height,
> +                                   priv->tile_rows * priv->tile_cols,
> +                                   priv->tile_cols, framerate);
> +        if (level) {
> +            av_log(avctx, AV_LOG_VERBOSE, "Using level %s.\n", level->name);
> +            sh->seq_level_idx[0] = level->level_idx;
> +        } else {
> +            av_log(avctx, AV_LOG_VERBOSE, "Stream will not conform to "
> +                   "any normal level, using level 6.3 by default.\n");
> +            sh->seq_level_idx[0] = 19;
> +            sh->seq_tier[0] = 1;
> +        }
> +    }
> +    vseq->seq_profile             = sh->seq_profile;
> +    vseq->seq_level_idx           = sh->seq_level_idx[0];
> +    vseq->seq_tier                = sh->seq_tier[0];
> +    vseq->order_hint_bits_minus_1 = sh->order_hint_bits_minus_1;
> +    vseq->intra_period            = ctx->gop_size;
> +    vseq->ip_period               = ctx->b_per_p + 1;
> +
> +    vseq->seq_fields.bits.enable_order_hint = sh->enable_order_hint;
> +
> +    if (!(ctx->va_rc_mode & VA_RC_CQP)) {
> +        vseq->bits_per_second = ctx->va_bit_rate;
> +        vseq->seq_fields.bits.enable_cdef = sh->enable_cdef = 1;
> +    }
> +
> +    ret = vaapi_encode_av1_add_obu(avctx, obu, AV1_OBU_SEQUENCE_HEADER, &priv->sh);
> +    if (ret < 0)
> +        goto end;
> +
> +    ret = vaapi_encode_av1_write_obu(avctx, priv->sh_data, &priv->sh_data_len, obu);
> +    if (ret < 0)
> +        goto end;
> +
> +end:
> +    ff_cbs_fragment_reset(obu);
> +    return ret;
> +}
> +
> +static int vaapi_encode_av1_init_picture_params(AVCodecContext *avctx,
> +                                                VAAPIEncodePicture *pic)
> +{
> +    VAAPIEncodeAV1Context          *priv = avctx->priv_data;
> +    VAAPIEncodeAV1Picture          *hpic = pic->priv_data;
> +    AV1RawOBU                    *fh_obu = &priv->fh;
> +    AV1RawFrameHeader                *fh = &fh_obu->obu.frame.header;
> +    VAEncPictureParameterBufferAV1 *vpic = pic->codec_picture_params;
> +    CodedBitstreamFragment          *obu = &priv->current_obu;
> +    VAAPIEncodePicture    *ref;
> +    VAAPIEncodeAV1Picture *href;
> +    int slot, i;
> +    int ret;
> +    static const int8_t default_loop_filter_ref_deltas[AV1_TOTAL_REFS_PER_FRAME] =
> +        { 1, 0, 0, 0, -1, 0, -1, -1 };
> +
> +    memset(fh_obu, 0, sizeof(*fh_obu));
> +    pic->nb_slices = priv->tile_groups;
> +    fh_obu->header.obu_type = AV1_OBU_FRAME_HEADER;
> +    fh_obu->header.obu_has_size_field = 1;
> +
> +    switch (pic->type) {
> +    case PICTURE_TYPE_IDR:
> +        av_assert0(pic->nb_refs[0] == 0 || pic->nb_refs[1]);
> +        fh->frame_type = AV1_FRAME_KEY;
> +        fh->refresh_frame_flags = 0xFF;
> +        fh->base_q_idx = priv->q_idx_idr;
> +        hpic->slot = 0;
> +        hpic->last_idr_frame = pic->display_order;
> +        break;
> +    case PICTURE_TYPE_P:
> +        av_assert0(pic->nb_refs[0]);
> +        fh->frame_type = AV1_FRAME_INTER;
> +        fh->base_q_idx = priv->q_idx_p;
> +        ref = pic->refs[0][pic->nb_refs[0] - 1];
> +        href = ref->priv_data;
> +        hpic->slot = !href->slot;
> +        hpic->last_idr_frame = href->last_idr_frame;
> +        fh->refresh_frame_flags = 1 << hpic->slot;
> +
> +        /** set the nearest frame in L0 as all reference frame. */
> +        for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
> +            fh->ref_frame_idx[i] = href->slot;
> +        }
> +        fh->primary_ref_frame = href->slot;
> +        fh->ref_order_hint[href->slot] = ref->display_order - href->last_idr_frame;
> +        vpic->ref_frame_ctrl_l0.fields.search_idx0 = AV1_REF_FRAME_LAST;
> +
> +        /** set the 2nd nearest frame in L0 as Golden frame. */
> +        if (pic->nb_refs[0] > 1) {
> +            ref = pic->refs[0][pic->nb_refs[0] - 2];
> +            href = ref->priv_data;
> +            fh->ref_frame_idx[3] = href->slot;
> +            fh->ref_order_hint[href->slot] = ref->display_order - href->last_idr_frame;
> +            vpic->ref_frame_ctrl_l0.fields.search_idx1 = AV1_REF_FRAME_GOLDEN;
> +        }
> +        break;
> +    case PICTURE_TYPE_B:
> +        av_assert0(pic->nb_refs[0] && pic->nb_refs[1]);
> +        fh->frame_type = AV1_FRAME_INTER;
> +        fh->base_q_idx = priv->q_idx_b;
> +        fh->refresh_frame_flags = 0x0;
> +        fh->reference_select = 1;
> +
> +        /** B frame will not be referenced, disable its recon frame. */
> +        vpic->picture_flags.bits.disable_frame_recon = 1;
> +
> +        /** Use LAST_FRAME and BWDREF_FRAME for reference. */
> +        vpic->ref_frame_ctrl_l0.fields.search_idx0 = AV1_REF_FRAME_LAST;
> +        vpic->ref_frame_ctrl_l1.fields.search_idx0 = AV1_REF_FRAME_BWDREF;
> +
> +        ref                            = pic->refs[0][pic->nb_refs[0] - 1];
> +        href                           = ref->priv_data;
> +        hpic->last_idr_frame           = href->last_idr_frame;
> +        fh->primary_ref_frame          = href->slot;
> +        fh->ref_order_hint[href->slot] = ref->display_order - href->last_idr_frame;
> +        for (i = 0; i < AV1_REF_FRAME_GOLDEN; i++) {
> +            fh->ref_frame_idx[i] = href->slot;
> +        }
> +
> +        ref                            = pic->refs[1][pic->nb_refs[1] - 1];
> +        href                           = ref->priv_data;
> +        fh->ref_order_hint[href->slot] = ref->display_order - href->last_idr_frame;
> +        for (i = AV1_REF_FRAME_GOLDEN; i < AV1_REFS_PER_FRAME; i++) {
> +            fh->ref_frame_idx[i] = href->slot;
> +        }
> +        break;
> +    default:
> +        av_assert0(0 && "invalid picture type");
> +    }
> +
> +    fh->show_frame                = pic->display_order <= pic->encode_order;
> +    fh->showable_frame            = fh->frame_type != AV1_FRAME_KEY;
> +    fh->frame_width_minus_1       = avctx->width - 1;
> +    fh->frame_height_minus_1      = avctx->height - 1;
> +    fh->render_width_minus_1      = fh->frame_width_minus_1;
> +    fh->render_height_minus_1     = fh->frame_height_minus_1;
> +    fh->order_hint                = pic->display_order - hpic->last_idr_frame;
> +    fh->tile_cols                 = priv->tile_cols;
> +    fh->tile_rows                 = priv->tile_rows;
> +    fh->tile_cols_log2            = priv->tile_cols_log2;
> +    fh->tile_rows_log2            = priv->tile_rows_log2;
> +    fh->uniform_tile_spacing_flag = priv->uniform_tile;
> +    fh->tile_size_bytes_minus1    = priv->attr_ext2.bits.tile_size_bytes_minus1;
> +    fh->reduced_tx_set            = 1;
> +
> +    /** ignore ONLY_4x4 mode for codedlossless is not fully implemented. */
> +    if (priv->attr_ext2.bits.tx_mode_support & 0x04)
> +        fh->tx_mode = AV1_TX_MODE_SELECT;
> +    else if (priv->attr_ext2.bits.tx_mode_support & 0x02)
> +        fh->tx_mode = AV1_TX_MODE_LARGEST;
> +    else
> +        return AVERROR(EINVAL);
> +
> +    for (i = 0; i < fh->tile_cols; i++)
> +        fh->width_in_sbs_minus_1[i] = vpic->width_in_sbs_minus_1[i] = priv->width_in_sbs_minus_1[i];
> +
> +    for (i = 0; i < fh->tile_rows; i++)
> +        fh->height_in_sbs_minus_1[i] = vpic->height_in_sbs_minus_1[i] = priv->height_in_sbs_minus_1[i];
> +
> +    memcpy(fh->loop_filter_ref_deltas, default_loop_filter_ref_deltas,
> +           AV1_TOTAL_REFS_PER_FRAME * sizeof(int8_t));
> +
> +    if (fh->frame_type == AV1_FRAME_KEY && fh->show_frame) {
> +        fh->error_resilient_mode = 1;
> +    }
> +
> +    if (fh->frame_type == AV1_FRAME_KEY || fh->error_resilient_mode)
> +        fh->primary_ref_frame = AV1_PRIMARY_REF_NONE;
> +
> +    vpic->base_qindex          = fh->base_q_idx;
> +    vpic->frame_width_minus_1  = fh->frame_width_minus_1;
> +    vpic->frame_height_minus_1 = fh->frame_height_minus_1;
> +    vpic->primary_ref_frame    = fh->primary_ref_frame;
> +    vpic->reconstructed_frame  = pic->recon_surface;
> +    vpic->coded_buf            = pic->output_buffer;
> +    vpic->tile_cols            = fh->tile_cols;
> +    vpic->tile_rows            = fh->tile_rows;
> +    vpic->order_hint           = fh->order_hint;
> +#if VA_CHECK_VERSION(1, 15, 0)
> +    vpic->refresh_frame_flags  = fh->refresh_frame_flags;
> +#endif
> +
> +    vpic->picture_flags.bits.enable_frame_obu     = 0;
> +    vpic->picture_flags.bits.frame_type           = fh->frame_type;
> +    vpic->picture_flags.bits.reduced_tx_set       = fh->reduced_tx_set;
> +    vpic->picture_flags.bits.error_resilient_mode = fh->error_resilient_mode;
> +
> +    /** let driver decide to use single or compound reference prediction mode. */
> +    vpic->mode_control_flags.bits.reference_mode = fh->reference_select ? 2 : 0;
> +    vpic->mode_control_flags.bits.tx_mode = fh->tx_mode;
> +
> +    vpic->tile_group_obu_hdr_info.bits.obu_has_size_field = 1;
> +
> +    /** set reference. */
> +    for (i = 0; i < AV1_REFS_PER_FRAME; i++)
> +        vpic->ref_frame_idx[i] = fh->ref_frame_idx[i];
> +
> +    for (i = 0; i < FF_ARRAY_ELEMS(vpic->reference_frames); i++)
> +        vpic->reference_frames[i] = VA_INVALID_SURFACE;
> +
> +    for (i = 0; i < MAX_REFERENCE_LIST_NUM; i++) {
> +        for (int j = 0; j < pic->nb_refs[i]; j++) {
> +            VAAPIEncodePicture *ref_pic = pic->refs[i][j];
> +
> +            slot = ((VAAPIEncodeAV1Picture*)ref_pic->priv_data)->slot;
> +            av_assert0(vpic->reference_frames[slot] == VA_INVALID_SURFACE);
> +
> +            vpic->reference_frames[slot] = ref_pic->recon_surface;
> +        }
> +    }
> +
> +    /** pack frame header, and set va params offset like bit_offset_qindex etc. */
> +    ret = vaapi_encode_av1_write_frame_header(avctx, pic, priv->fh_data, &priv->fh_data_len);
> +    if (ret < 0)
> +        goto end;
> +
> +end:
> +    ff_cbs_fragment_reset(obu);
> +    return ret;
> +}
> +
> +static int vaapi_encode_av1_init_slice_params(AVCodecContext *avctx,
> +                                              VAAPIEncodePicture *pic,
> +                                              VAAPIEncodeSlice *slice)
> +{
> +    VAAPIEncodeAV1Context      *priv = avctx->priv_data;
> +    VAEncTileGroupBufferAV1  *vslice = slice->codec_slice_params;
> +    CodedBitstreamAV1Context  *cbctx = priv->cbc->priv_data;
> +    int div;
> +
> +    /** Set tile group info. */
> +    div = priv->tile_cols * priv->tile_rows / priv->tile_groups;
> +    vslice->tg_start = slice->index * div;
> +    if (slice->index == (priv->tile_groups - 1)) {
> +        vslice->tg_end = priv->tile_cols * priv->tile_rows - 1;
> +        cbctx->seen_frame_header = 0;
> +    } else {
> +        vslice->tg_end = (slice->index + 1) * div - 1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int vaapi_encode_av1_write_picture_header(AVCodecContext *avctx,
> +                                                 VAAPIEncodePicture *pic,
> +                                                 char *data, size_t *data_len)
> +{
> +    VAAPIEncodeAV1Context     *priv = avctx->priv_data;
> +    CodedBitstreamFragment     *obu = &priv->current_obu;
> +    AV1RawOBU               *fh_obu = &priv->fh;
> +    AV1RawFrameHeader       *rep_fh = &fh_obu->obu.frame_header;
> +    VAAPIEncodeAV1Picture *href;
> +    int ret = 0;
> +
> +    pic->tail_size = 0;
> +    /** Pack repeat frame header. */
> +    if (pic->display_order > pic->encode_order) {
> +        memset(fh_obu, 0, sizeof(*fh_obu));
> +        href = pic->refs[0][pic->nb_refs[0] - 1]->priv_data;
> +        fh_obu->header.obu_type = AV1_OBU_FRAME_HEADER;
> +        fh_obu->header.obu_has_size_field = 1;
> +
> +        rep_fh->show_existing_frame   = 1;
> +        rep_fh->frame_to_show_map_idx = href->slot == 0;
> +        rep_fh->frame_type            = AV1_FRAME_INTER;
> +        rep_fh->frame_width_minus_1   = avctx->width - 1;
> +        rep_fh->frame_height_minus_1  = avctx->height - 1;
> +        rep_fh->render_width_minus_1  = rep_fh->frame_width_minus_1;
> +        rep_fh->render_height_minus_1 = rep_fh->frame_height_minus_1;
> +
> +        ret = vaapi_encode_av1_write_frame_header(avctx, pic, pic->tail_data, &pic->tail_size);
> +        if (ret < 0)
> +            goto end;
> +
> +        pic->tail_size /= 8;
> +    }
> +
> +    memcpy(data, &priv->fh_data, MAX_PARAM_BUFFER_SIZE * sizeof(char));
> +    *data_len = priv->fh_data_len;
> +
> +end:
> +    ff_cbs_fragment_reset(obu);
> +    return ret;
> +}
> +
> +static const VAAPIEncodeProfile vaapi_encode_av1_profiles[] = {
> +    { FF_PROFILE_AV1_MAIN,  8, 3, 1, 1, VAProfileAV1Profile0 },
> +    { FF_PROFILE_AV1_MAIN, 10, 3, 1, 1, VAProfileAV1Profile0 },
> +    { FF_PROFILE_UNKNOWN }
> +};
> +
> +static const VAAPIEncodeType vaapi_encode_type_av1 = {
> +    .profiles        = vaapi_encode_av1_profiles,
> +    .flags           = FLAG_B_PICTURES,
> +    .default_quality = 25,
> +    .configure       = &vaapi_encode_av1_configure,
> +
> +    .sequence_header_type  = VAEncPackedHeaderSequence,
> +    .sequence_params_size  = sizeof(VAEncSequenceParameterBufferAV1),
> +    .init_sequence_params  = &vaapi_encode_av1_init_sequence_params,
> +    .write_sequence_header = &vaapi_encode_av1_write_sequence_header,
> +
> +    .picture_priv_data_size = sizeof(VAAPIEncodeAV1Picture),
> +    .picture_header_type    = VAEncPackedHeaderPicture,
> +    .picture_params_size    = sizeof(VAEncPictureParameterBufferAV1),
> +    .init_picture_params    = &vaapi_encode_av1_init_picture_params,
> +    .write_picture_header   = &vaapi_encode_av1_write_picture_header,
> +
> +    .slice_params_size = sizeof(VAEncTileGroupBufferAV1),
> +    .init_slice_params = &vaapi_encode_av1_init_slice_params,
> +};
> +
> +static av_cold int vaapi_encode_av1_init(AVCodecContext *avctx)
> +{
> +    VAAPIEncodeContext      *ctx = avctx->priv_data;
> +    VAAPIEncodeAV1Context  *priv = avctx->priv_data;
> +    VAConfigAttrib attr;
> +    VAStatus vas;
> +    int ret;
> +
> +    ctx->codec = &vaapi_encode_type_av1;
> +
> +    ctx->desired_packed_headers =
> +        VA_ENC_PACKED_HEADER_SEQUENCE |
> +        VA_ENC_PACKED_HEADER_PICTURE;
> +
> +    if (avctx->profile == FF_PROFILE_UNKNOWN)
> +        avctx->profile = priv->profile;
> +    if (avctx->level == FF_LEVEL_UNKNOWN)
> +        avctx->level = priv->level;
> +
> +    if (avctx->level != FF_LEVEL_UNKNOWN && avctx->level & ~0x1f) {
> +        av_log(avctx, AV_LOG_ERROR, "Invalid level %d\n", avctx->level);
> +        return AVERROR(EINVAL);
> +    }
> +
> +    ret = ff_vaapi_encode_init(avctx);
> +    if (ret < 0)
> +        return ret;
> +
> +    attr.type = VAConfigAttribEncAV1;
> +    vas = vaGetConfigAttributes(ctx->hwctx->display,
> +                                ctx->va_profile,
> +                                ctx->va_entrypoint,
> +                                &attr, 1);
> +    if (vas != VA_STATUS_SUCCESS) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to query "
> +               "config attribute: %d (%s).\n", vas, vaErrorStr(vas));
> +        return AVERROR_EXTERNAL;
> +    } else if (attr.value == VA_ATTRIB_NOT_SUPPORTED) {
> +        priv->attr.value = 0;
> +        av_log(avctx, AV_LOG_WARNING, "Attribute type:%d is not "
> +               "supported.\n", attr.type);
> +    } else {
> +        priv->attr.value = attr.value;
> +    }
> +
> +    attr.type = VAConfigAttribEncAV1Ext1;
> +    vas = vaGetConfigAttributes(ctx->hwctx->display,
> +                                ctx->va_profile,
> +                                ctx->va_entrypoint,
> +                                &attr, 1);
> +    if (vas != VA_STATUS_SUCCESS) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to query "
> +               "config attribute: %d (%s).\n", vas, vaErrorStr(vas));
> +        return AVERROR_EXTERNAL;
> +    } else if (attr.value == VA_ATTRIB_NOT_SUPPORTED) {
> +        priv->attr_ext1.value = 0;
> +        av_log(avctx, AV_LOG_WARNING, "Attribute type:%d is not "
> +               "supported.\n", attr.type);
> +    } else {
> +        priv->attr_ext1.value = attr.value;
> +    }
> +
> +    /** This attr provides essential indicators, return error if not support. */
> +    attr.type = VAConfigAttribEncAV1Ext2;
> +    vas = vaGetConfigAttributes(ctx->hwctx->display,
> +                                ctx->va_profile,
> +                                ctx->va_entrypoint,
> +                                &attr, 1);
> +    if (vas != VA_STATUS_SUCCESS || attr.value == VA_ATTRIB_NOT_SUPPORTED) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to query "
> +               "config attribute: %d (%s).\n", vas, vaErrorStr(vas));
> +        return AVERROR_EXTERNAL;
> +    } else {
> +        priv->attr_ext2.value = attr.value;
> +    }
> +
> +    ret = vaapi_encode_av1_set_tile(avctx);
> +    if (ret < 0)
> +        return ret;
> +
> +    return 0;
> +}
> +
> +static av_cold int vaapi_encode_av1_close(AVCodecContext *avctx)
> +{
> +    VAAPIEncodeAV1Context *priv = avctx->priv_data;
> +
> +    ff_cbs_fragment_free(&priv->current_obu);
> +    ff_cbs_close(&priv->cbc);
> +
> +    return ff_vaapi_encode_close(avctx);
> +}
> +
> +#define OFFSET(x) offsetof(VAAPIEncodeAV1Context, x)
> +#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM)
> +
> +static const AVOption vaapi_encode_av1_options[] = {
> +    VAAPI_ENCODE_COMMON_OPTIONS,
> +    VAAPI_ENCODE_RC_OPTIONS,
> +    { "profile", "Set profile (seq_profile)",
> +      OFFSET(profile), AV_OPT_TYPE_INT,
> +      { .i64 = FF_PROFILE_UNKNOWN }, FF_PROFILE_UNKNOWN, 0xff, FLAGS, "profile" },
> +
> +#define PROFILE(name, value)  name, NULL, 0, AV_OPT_TYPE_CONST, \
> +    { .i64 = value }, 0, 0, FLAGS, "profile"
> +    { PROFILE("main",               FF_PROFILE_AV1_MAIN) },
> +    { PROFILE("high",               FF_PROFILE_AV1_HIGH) },
> +    { PROFILE("professional",       FF_PROFILE_AV1_PROFESSIONAL) },
> +#undef PROFILE
> +
> +    { "tier", "Set tier (seq_tier)",
> +      OFFSET(tier), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS, "tier" },
> +    { "main", NULL, 0, AV_OPT_TYPE_CONST,
> +      { .i64 = 0 }, 0, 0, FLAGS, "tier" },
> +    { "high", NULL, 0, AV_OPT_TYPE_CONST,
> +      { .i64 = 1 }, 0, 0, FLAGS, "tier" },
> +    { "level", "Set level (seq_level_idx)",
> +      OFFSET(level), AV_OPT_TYPE_INT,
> +      { .i64 = FF_LEVEL_UNKNOWN }, FF_LEVEL_UNKNOWN, 0x1f, FLAGS, "level" },
> +
> +#define LEVEL(name, value) name, NULL, 0, AV_OPT_TYPE_CONST, \
> +      { .i64 = value }, 0, 0, FLAGS, "level"
> +    { LEVEL("2.0",  0) },
> +    { LEVEL("2.1",  1) },
> +    { LEVEL("3.0",  4) },
> +    { LEVEL("3.1",  5) },
> +    { LEVEL("4.0",  8) },
> +    { LEVEL("4.1",  9) },
> +    { LEVEL("5.0", 12) },
> +    { LEVEL("5.1", 13) },
> +    { LEVEL("5.2", 14) },
> +    { LEVEL("5.3", 15) },
> +    { LEVEL("6.0", 16) },
> +    { LEVEL("6.1", 17) },
> +    { LEVEL("6.2", 18) },
> +    { LEVEL("6.3", 19) },
> +#undef LEVEL
> +
> +    { "tiles", "Tile columns x rows",
> +      OFFSET(tile_cols), AV_OPT_TYPE_IMAGE_SIZE, { .str = "1x1" }, 1, AV1_MAX_TILE_COLS, FLAGS },
> +    { "tile_groups", "Number of tile groups for encoding",
> +      OFFSET(tile_groups), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, AV1_MAX_TILE_ROWS * AV1_MAX_TILE_COLS, FLAGS },
> +
> +    { NULL },
> +};
> +
> +static const FFCodecDefault vaapi_encode_av1_defaults[] = {
> +    { "b",              "0"   },
> +    { "bf",             "7"   },
> +    { "g",              "120" },
> +    { "qmin",           "1"   },
> +    { "qmax",           "255" },
> +    { NULL },
> +};
> +
> +static const AVClass vaapi_encode_av1_class = {
> +    .class_name = "av1_vaapi",
> +    .item_name  = av_default_item_name,
> +    .option     = vaapi_encode_av1_options,
> +    .version    = LIBAVUTIL_VERSION_INT,
> +};
> +
> +const FFCodec ff_av1_vaapi_encoder = {
> +    .p.name         = "av1_vaapi",
> +    CODEC_LONG_NAME("AV1 (VAAPI)"),
> +    .p.type         = AVMEDIA_TYPE_VIDEO,
> +    .p.id           = AV_CODEC_ID_AV1,
> +    .priv_data_size = sizeof(VAAPIEncodeAV1Context),
> +    .init           = &vaapi_encode_av1_init,
> +    FF_CODEC_RECEIVE_PACKET_CB(&ff_vaapi_encode_receive_packet),
> +    .close          = &vaapi_encode_av1_close,
> +    .p.priv_class   = &vaapi_encode_av1_class,
> +    .p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE |
> +                      AV_CODEC_CAP_DR1 | AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE,
> +    .caps_internal  = FF_CODEC_CAP_NOT_INIT_THREADSAFE |
> +                      FF_CODEC_CAP_INIT_CLEANUP,
> +    .defaults       = vaapi_encode_av1_defaults,
> +    .p.pix_fmts = (const enum AVPixelFormat[]) {
> +        AV_PIX_FMT_VAAPI,
> +        AV_PIX_FMT_NONE,
> +    },
> +    .hw_configs     = ff_vaapi_encode_hw_configs,
> +    .p.wrapper_name = "vaapi",
> +};
> --
> 2.25.1
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Wang, Fei W July 18, 2023, 3:15 a.m. UTC | #2
On Mon, 2023-07-17 at 13:12 +0200, David Rosca wrote:
> On Mon, Jul 10, 2023 at 9:40 AM Fei Wang
> <fei.w.wang-at-intel.com@ffmpeg.org> wrote:
> > Signed-off-by: Fei Wang <fei.w.wang@intel.com>
> > ---
> >  Changelog                     |    1 +
> >  configure                     |    3 +
> >  doc/encoders.texi             |   13 +
> >  libavcodec/Makefile           |    1 +
> >  libavcodec/allcodecs.c        |    1 +
> >  libavcodec/vaapi_encode.c     |  125 +++-
> >  libavcodec/vaapi_encode.h     |   12 +
> >  libavcodec/vaapi_encode_av1.c | 1228
> > +++++++++++++++++++++++++++++++++
> >  8 files changed, 1366 insertions(+), 18 deletions(-)
> >  create mode 100644 libavcodec/vaapi_encode_av1.c
> > 
> > diff --git a/Changelog b/Changelog
> > index 3876082844..7ae9b85d52 100644
> > --- a/Changelog
> > +++ b/Changelog
> > @@ -25,6 +25,7 @@ version <next>:
> >  - Raw VVC bitstream parser, muxer and demuxer
> >  - Bitstream filter for editing metadata in VVC streams
> >  - Bitstream filter for converting VVC from MP4 to Annex B
> > +- VAAPI AV1 encoder
> > 
> >  version 6.0:
> >  - Radiance HDR image support
> > diff --git a/configure b/configure
> > index 0ab0761011..6a1a30aaec 100755
> > --- a/configure
> > +++ b/configure
> > @@ -3323,6 +3323,8 @@ av1_qsv_decoder_select="qsvdec"
> >  av1_qsv_encoder_select="qsvenc"
> >  av1_qsv_encoder_deps="libvpl"
> >  av1_amf_encoder_deps="amf"
> > +av1_vaapi_encoder_deps="VAEncPictureParameterBufferAV1"
> > +av1_vaapi_encoder_select="cbs_av1 vaapi_encode"
> > 
> >  # parsers
> >  aac_parser_select="adts_header mpeg4audio"
> > @@ -7106,6 +7108,7 @@ if enabled vaapi; then
> >      check_type "va/va.h va/va_enc_jpeg.h"
> > "VAEncPictureParameterBufferJPEG"
> >      check_type "va/va.h
> > va/va_enc_vp8.h"  "VAEncPictureParameterBufferVP8"
> >      check_type "va/va.h
> > va/va_enc_vp9.h"  "VAEncPictureParameterBufferVP9"
> > +    check_type "va/va.h
> > va/va_enc_av1.h"  "VAEncPictureParameterBufferAV1"
> >  fi
> > 
> >  if enabled_all opencl libdrm ; then
> > diff --git a/doc/encoders.texi b/doc/encoders.texi
> > index 25d6b7f09e..fb331ebd8e 100644
> > --- a/doc/encoders.texi
> > +++ b/doc/encoders.texi
> > @@ -3991,6 +3991,19 @@ Average variable bitrate.
> >  Each encoder also has its own specific options:
> >  @table @option
> > 
> > +@item av1_vaapi
> > +@option{profile} sets the value of @emph{seq_profile}.
> > +@option{tier} sets the value of @emph{seq_tier}.
> > +@option{level} sets the value of @emph{seq_level_idx}.
> > +
> > +@table @option
> > +@item tiles
> > +Set the number of tiles to encode the input video with, as columns
> > x rows.
> > +(default is 1x1).
> > +@item tile_groups
> > +Set tile groups number (default is 1).
> > +@end table
> > +
> >  @item h264_vaapi
> >  @option{profile} sets the value of @emph{profile_idc} and the
> > @emph{constraint_set*_flag}s.
> >  @option{level} sets the value of @emph{level_idc}.
> > diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> > index 3cd5997e64..fe1e6aa99d 100644
> > --- a/libavcodec/Makefile
> > +++ b/libavcodec/Makefile
> > @@ -259,6 +259,7 @@ OBJS-$(CONFIG_AV1_MEDIACODEC_DECODER)  +=
> > mediacodecdec.o
> >  OBJS-$(CONFIG_AV1_MEDIACODEC_ENCODER)  += mediacodecenc.o
> >  OBJS-$(CONFIG_AV1_NVENC_ENCODER)       += nvenc_av1.o nvenc.o
> >  OBJS-$(CONFIG_AV1_QSV_ENCODER)         += qsvenc_av1.o
> > +OBJS-$(CONFIG_AV1_VAAPI_ENCODER)       += vaapi_encode_av1.o
> > av1_profile_level.o
> >  OBJS-$(CONFIG_AVRN_DECODER)            += avrndec.o
> >  OBJS-$(CONFIG_AVRP_DECODER)            += r210dec.o
> >  OBJS-$(CONFIG_AVRP_ENCODER)            += r210enc.o
> > diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
> > index 8775d15a4f..c43c1d7b48 100644
> > --- a/libavcodec/allcodecs.c
> > +++ b/libavcodec/allcodecs.c
> > @@ -844,6 +844,7 @@ extern const FFCodec ff_av1_nvenc_encoder;
> >  extern const FFCodec ff_av1_qsv_decoder;
> >  extern const FFCodec ff_av1_qsv_encoder;
> >  extern const FFCodec ff_av1_amf_encoder;
> > +extern const FFCodec ff_av1_vaapi_encoder;
> >  extern const FFCodec ff_libopenh264_encoder;
> >  extern const FFCodec ff_libopenh264_decoder;
> >  extern const FFCodec ff_h264_amf_encoder;
> > diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
> > index 2604f12b9e..2907e159fb 100644
> > --- a/libavcodec/vaapi_encode.c
> > +++ b/libavcodec/vaapi_encode.c
> > @@ -669,6 +669,15 @@ static int
> > vaapi_encode_set_output_timestamp(AVCodecContext *avctx,
> >  {
> >      VAAPIEncodeContext *ctx = avctx->priv_data;
> > 
> > +    // AV1 packs P frame and next B frame into one pkt, and uses
> > the other
> > +    // repeat frame header pkt at the display order position of
> > the P frame
> > +    // to indicate its frame index. Each frame has a corresponding
> > pkt in its
> > +    // display order position. So don't need to consider delay for
> > AV1 timestamp.
> > +    if (avctx->codec_id == AV_CODEC_ID_AV1) {
> > +        pkt->dts = pkt->pts - ctx->dts_pts_diff;
> > +        return 0;
> > +    }
> > +
> >      if (ctx->output_delay == 0) {
> >          pkt->dts = pkt->pts;
> >      } else if (pic->encode_order < ctx->decode_delay) {
> > @@ -689,9 +698,10 @@ static int vaapi_encode_output(AVCodecContext
> > *avctx,
> >  {
> >      VAAPIEncodeContext *ctx = avctx->priv_data;
> >      VACodedBufferSegment *buf_list, *buf;
> > -    VAStatus vas;
> > +    AVPacket *pkt_ptr = pkt;
> >      int total_size = 0;
> >      uint8_t *ptr;
> > +    VAStatus vas;
> >      int err;
> > 
> >      err = vaapi_encode_wait(avctx, pic);
> > @@ -711,11 +721,52 @@ static int vaapi_encode_output(AVCodecContext
> > *avctx,
> >      for (buf = buf_list; buf; buf = buf->next)
> >          total_size += buf->size;
> > 
> > -    err = ff_get_encode_buffer(avctx, pkt, total_size, 0);
> > -    ptr = pkt->data;
> > +    /** repack av1 coded frame for not display and repeat frames
> > */
> > +    if (avctx->codec_id == AV_CODEC_ID_AV1) {
> > +        int display_frame = pic->display_order <= pic-
> > >encode_order;
> > 
> > -    if (err < 0)
> > -        goto fail_mapped;
> > +        if (display_frame) {
> > +            total_size += ctx->header_data_size;
> > +            err = ff_get_encode_buffer(avctx, pkt, total_size, 0);
> > +            if (err < 0)
> > +                goto fail_mapped;
> > +            ptr = pkt->data;
> > +
> > +            if (ctx->header_data_size) {
> > +                memcpy(ptr, ctx->header_data, ctx-
> > >header_data_size);
> > +                ptr += ctx->header_data_size;
> > +                ctx->header_data_size = 0;
> > +            }
> > +        } else {
> > +            ctx->header_data = av_realloc(ctx->header_data,
> > total_size);
> > +            if (!ctx->header_data) {
> > +                err = AVERROR(ENOMEM);
> > +                goto fail_mapped;
> > +            }
> > +            ptr = ctx->header_data;
> > +            ctx->header_data_size = total_size;
> > +
> > +            if (pic->tail_size) {
> > +                if (ctx->tail_pkt->size) {
> > +                    err = AVERROR(AVERROR_BUG);
> > +                    goto fail_mapped;
> > +                }
> > +
> > +                err = ff_get_encode_buffer(avctx, ctx->tail_pkt,
> > pic->tail_size, 0);
> > +                if (err < 0)
> > +                    goto fail_mapped;
> > +
> > +                memcpy(ctx->tail_pkt->data, pic->tail_data, pic-
> > >tail_size);
> > +                pkt_ptr = ctx->tail_pkt;
> > +            }
> > +        }
> > +    } else {
> > +        err = ff_get_encode_buffer(avctx, pkt, total_size, 0);
> > +        ptr = pkt->data;
> > +
> > +        if (err < 0)
> > +            goto fail_mapped;
> > +    }
> > 
> >      for (buf = buf_list; buf; buf = buf->next) {
> >          av_log(avctx, AV_LOG_DEBUG, "Output buffer: %u bytes "
> > @@ -726,10 +777,10 @@ static int vaapi_encode_output(AVCodecContext
> > *avctx,
> >      }
> > 
> >      if (pic->type == PICTURE_TYPE_IDR)
> > -        pkt->flags |= AV_PKT_FLAG_KEY;
> > +        pkt_ptr->flags |= AV_PKT_FLAG_KEY;
> > 
> > -    pkt->pts = pic->pts;
> > -    pkt->duration = pic->duration;
> > +    pkt_ptr->pts = pic->pts;
> > +    pkt_ptr->duration = pic->duration;
> > 
> >      vas = vaUnmapBuffer(ctx->hwctx->display, pic->output_buffer);
> >      if (vas != VA_STATUS_SUCCESS) {
> > @@ -742,8 +793,8 @@ static int vaapi_encode_output(AVCodecContext
> > *avctx,
> >      // for no-delay encoders this is handled in generic codec
> >      if (avctx->codec->capabilities & AV_CODEC_CAP_DELAY &&
> >          avctx->flags & AV_CODEC_FLAG_COPY_OPAQUE) {
> > -        pkt->opaque     = pic->opaque;
> > -        pkt->opaque_ref = pic->opaque_ref;
> > +        pkt_ptr->opaque     = pic->opaque;
> > +        pkt_ptr->opaque_ref = pic->opaque_ref;
> >          pic->opaque_ref = NULL;
> >      }
> > 
> > @@ -752,6 +803,9 @@ static int vaapi_encode_output(AVCodecContext
> > *avctx,
> > 
> >      av_log(avctx, AV_LOG_DEBUG, "Output read for pic
> > %"PRId64"/%"PRId64".\n",
> >             pic->display_order, pic->encode_order);
> > +
> > +    vaapi_encode_set_output_timestamp(avctx, pic, pkt_ptr);
> > +
> >      return 0;
> > 
> >  fail_mapped:
> > @@ -1128,9 +1182,19 @@ static int
> > vaapi_encode_pick_next(AVCodecContext *avctx,
> > 
> >      vaapi_encode_add_ref(avctx, pic, pic, 0, 1, 0);
> >      if (pic->type != PICTURE_TYPE_IDR) {
> > -        vaapi_encode_add_ref(avctx, pic, start,
> > -                             pic->type == PICTURE_TYPE_P,
> > -                             b_counter > 0, 0);
> > +        // TODO: apply both previous and forward multi reference
> > for all vaapi encoders.
> > +        // And L0/L1 reference frame number can be set dynamically
> > through query
> > +        // VAConfigAttribEncMaxRefFrames attribute.
> > +        if (avctx->codec_id == AV_CODEC_ID_AV1) {
> > +            for (i = 0; i < ctx->nb_next_prev; i++)
> > +                vaapi_encode_add_ref(avctx, pic, ctx-
> > >next_prev[i],
> > +                                     pic->type == PICTURE_TYPE_P,
> > +                                     b_counter > 0, 0);
> > +        } else
> > +            vaapi_encode_add_ref(avctx, pic, start,
> > +                                 pic->type == PICTURE_TYPE_P,
> > +                                 b_counter > 0, 0);
> > +
> >          vaapi_encode_add_ref(avctx, pic, ctx->next_prev[ctx-
> > >nb_next_prev - 1], 0, 0, 1);
> >      }
> > 
> > @@ -1292,6 +1356,19 @@ int
> > ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket
> > *pkt)
> >      AVFrame *frame = ctx->frame;
> >      int err;
> > 
> > +start:
> > +    /** if no B frame before repeat P frame, sent repeat P frame
> > out. */
> > +    if (avctx->codec_id == AV_CODEC_ID_AV1 && ctx->tail_pkt->size) 
> > {
> > +        for (VAAPIEncodePicture *tmp = ctx->pic_start; tmp; tmp =
> > tmp->next) {
> > +            if (tmp->type == PICTURE_TYPE_B && tmp->pts < ctx-
> > >tail_pkt->pts)
> > +                break;
> > +            else if (!tmp->next) {
> > +                av_packet_move_ref(pkt, ctx->tail_pkt);
> > +                goto end;
> > +            }
> > +        }
> > +    }
> > +
> >      err = ff_encode_get_frame(avctx, frame);
> >      if (err < 0 && err != AVERROR_EOF)
> >          return err;
> > @@ -1356,17 +1433,21 @@ int
> > ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket
> > *pkt)
> >          return err;
> >      }
> > 
> > -    vaapi_encode_set_output_timestamp(avctx, pic, pkt);
> > -    av_log(avctx, AV_LOG_DEBUG, "Output packet: pts %"PRId64", dts
> > %"PRId64", "
> > -           "size %u bytes.\n", pkt->pts, pkt->dts, pkt->size);
> > -
> >      ctx->output_order = pic->encode_order;
> >      vaapi_encode_clear_old(avctx);
> > 
> > +    /** loop to get an available pkt in encoder flushing. */
> > +    if (ctx->end_of_stream && !pkt->size)
> > +        goto start;
> > +
> > +end:
> > +    if (pkt->size)
> > +        av_log(avctx, AV_LOG_DEBUG, "Output packet: pts %"PRId64",
> > dts %"PRId64", "
> > +               "size %u bytes.\n", pkt->pts, pkt->dts, pkt->size);
> > +
> >      return 0;
> >  }
> > 
> > -
> >  static av_cold void vaapi_encode_add_global_param(AVCodecContext
> > *avctx, int type,
> >                                                    void *buffer,
> > size_t size)
> >  {
> > @@ -2667,6 +2748,12 @@ av_cold int
> > ff_vaapi_encode_init(AVCodecContext *avctx)
> >      ctx->device = (AVHWDeviceContext*)ctx->device_ref->data;
> >      ctx->hwctx = ctx->device->hwctx;
> > 
> > +    ctx->tail_pkt = av_packet_alloc();
> > +    if (!ctx->tail_pkt) {
> > +        err = AVERROR(ENOMEM);
> > +        goto fail;
> > +    }
> > +
> >      err = vaapi_encode_profile_entrypoint(avctx);
> >      if (err < 0)
> >          goto fail;
> > @@ -2859,9 +2946,11 @@ av_cold int
> > ff_vaapi_encode_close(AVCodecContext *avctx)
> >      }
> > 
> >      av_frame_free(&ctx->frame);
> > +    av_packet_free(&ctx->tail_pkt);
> > 
> >      av_freep(&ctx->codec_sequence_params);
> >      av_freep(&ctx->codec_picture_params);
> > +    av_freep(&ctx->header_data);
> >      av_fifo_freep2(&ctx->encode_fifo);
> > 
> >      av_buffer_unref(&ctx->recon_frames_ref);
> > diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
> > index d5452a37b3..03df8d6d46 100644
> > --- a/libavcodec/vaapi_encode.h
> > +++ b/libavcodec/vaapi_encode.h
> > @@ -133,6 +133,11 @@ typedef struct VAAPIEncodePicture {
> > 
> >      int          nb_slices;
> >      VAAPIEncodeSlice *slices;
> > +
> > +    /** Tail data of current pic, used only for repeat header of
> > AV1. */
> > +    char tail_data[MAX_PARAM_BUFFER_SIZE];
> > +    /** Byte length of tail_data. */
> > +    size_t tail_size;
> >  } VAAPIEncodePicture;
> > 
> >  typedef struct VAAPIEncodeProfile {
> > @@ -367,6 +372,13 @@ typedef struct VAAPIEncodeContext {
> >      AVFifo          *encode_fifo;
> >      // Max number of frame buffered in encoder.
> >      int             async_depth;
> > +
> > +    /** Head data for current output pkt, used only for AV1. */
> > +    void  *header_data;
> > +    size_t header_data_size;
> > +
> > +    /** Store av1 repeat frame header pkt. */
> > +    AVPacket *tail_pkt;
> >  } VAAPIEncodeContext;
> > 
> >  enum {
> > diff --git a/libavcodec/vaapi_encode_av1.c
> > b/libavcodec/vaapi_encode_av1.c
> > new file mode 100644
> > index 0000000000..1635b25bb7
> > --- /dev/null
> > +++ b/libavcodec/vaapi_encode_av1.c
> > @@ -0,0 +1,1228 @@
> > +/*
> > + * Copyright (c) 2023 Intel Corporation
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later
> > version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General
> > Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> > 02110-1301 USA
> > + */
> > +
> > +#include <va/va.h>
> > +#include <va/va_enc_av1.h>
> > +
> > +#include "libavutil/pixdesc.h"
> > +#include "libavutil/opt.h"
> > +
> > +#include "cbs_av1.h"
> > +#include "put_bits.h"
> > +#include "codec_internal.h"
> > +#include "av1_profile_level.h"
> > +#include "vaapi_encode.h"
> > +
> > +#define AV1_MAX_QUANT 255
> > +
> > +typedef struct VAAPIEncodeAV1Picture {
> > +    int64_t last_idr_frame;
> > +    int slot;
> > +} VAAPIEncodeAV1Picture;
> > +
> > +typedef struct VAAPIEncodeAV1Context {
> > +    VAAPIEncodeContext common;
> > +    AV1RawOBU sh; /**< sequence header.*/
> > +    AV1RawOBU fh; /**< frame header.*/
> > +    CodedBitstreamContext *cbc;
> > +    CodedBitstreamFragment current_obu;
> > +    VAConfigAttribValEncAV1 attr;
> > +    VAConfigAttribValEncAV1Ext1 attr_ext1;
> > +    VAConfigAttribValEncAV1Ext2 attr_ext2;
> > +
> > +    char sh_data[MAX_PARAM_BUFFER_SIZE]; /**< coded sequence
> > header data. */
> > +    size_t sh_data_len; /**< bit length of sh_data. */
> > +    char fh_data[MAX_PARAM_BUFFER_SIZE]; /**< coded frame header
> > data. */
> > +    size_t fh_data_len; /**< bit length of fh_data. */
> > +
> > +    uint8_t uniform_tile;
> > +    uint8_t use_128x128_superblock;
> > +    int sb_cols;
> > +    int sb_rows;
> > +    int tile_cols_log2;
> > +    int tile_rows_log2;
> > +    int max_tile_width_sb;
> > +    int max_tile_height_sb;
> > +    uint8_t width_in_sbs_minus_1[AV1_MAX_TILE_COLS];
> > +    uint8_t height_in_sbs_minus_1[AV1_MAX_TILE_ROWS];
> > +
> > +    int min_log2_tile_cols;
> > +    int max_log2_tile_cols;
> > +    int min_log2_tile_rows;
> > +    int max_log2_tile_rows;
> > +
> > +    int q_idx_idr;
> > +    int q_idx_p;
> > +    int q_idx_b;
> > +
> > +    /** user options */
> > +    int profile;
> > +    int level;
> > +    int tier;
> > +    int tile_cols, tile_rows;
> > +    int tile_groups;
> > +} VAAPIEncodeAV1Context;
> > +
> > +static av_cold int vaapi_encode_av1_configure(AVCodecContext
> > *avctx)
> > +{
> > +    VAAPIEncodeContext     *ctx = avctx->priv_data;
> > +    VAAPIEncodeAV1Context *priv = avctx->priv_data;
> > +    int ret;
> > +
> > +    ret = ff_cbs_init(&priv->cbc, AV_CODEC_ID_AV1, avctx);
> > +    if (ret < 0)
> > +        return ret;
> > +
> > +    if (ctx->rc_mode->quality) {
> > +        priv->q_idx_p = av_clip(ctx->rc_quality, 0,
> > AV1_MAX_QUANT);
> > +        if (fabs(avctx->i_quant_factor) > 0.0)
> > +            priv->q_idx_idr =
> > +                av_clip((fabs(avctx->i_quant_factor) * priv-
> > >q_idx_p  +
> > +                         avctx->i_quant_offset) + 0.5,
> > +                        0, AV1_MAX_QUANT);
> > +        else
> > +            priv->q_idx_idr = priv->q_idx_p;
> > +
> > +        if (fabs(avctx->b_quant_factor) > 0.0)
> > +            priv->q_idx_b =
> > +                av_clip((fabs(avctx->b_quant_factor) * priv-
> > >q_idx_p  +
> > +                         avctx->b_quant_offset) + 0.5,
> > +                        0, AV1_MAX_QUANT);
> > +        else
> > +            priv->q_idx_b = priv->q_idx_p;
> > +    } else {
> > +        /** Arbitrary value */
> > +        priv->q_idx_idr = priv->q_idx_p = priv->q_idx_b = 128;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +static int vaapi_encode_av1_add_obu(AVCodecContext *avctx,
> > +                                    CodedBitstreamFragment *au,
> > +                                    uint8_t type,
> > +                                    void *obu_unit)
> > +{
> > +    int ret;
> > +
> > +    ret = ff_cbs_insert_unit_content(au, -1,
> > +                                     type, obu_unit, NULL);
> > +    if (ret < 0) {
> > +        av_log(avctx, AV_LOG_ERROR, "Failed to add OBU unit: "
> > +               "type = %d.\n", type);
> > +        return ret;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +static int vaapi_encode_av1_write_obu(AVCodecContext *avctx,
> > +                                      char *data, size_t
> > *data_len,
> > +                                      CodedBitstreamFragment *bs)
> > +{
> > +    VAAPIEncodeAV1Context *priv = avctx->priv_data;
> > +    int ret;
> > +
> > +    ret = ff_cbs_write_fragment_data(priv->cbc, bs);
> > +    if (ret < 0) {
> > +        av_log(avctx, AV_LOG_ERROR, "Failed to write packed
> > header.\n");
> > +        return ret;
> > +    }
> > +
> > +    if ((size_t)8 * MAX_PARAM_BUFFER_SIZE < 8 * bs->data_size -
> > bs->data_bit_padding) {
> > +        av_log(avctx, AV_LOG_ERROR, "Access unit too large: "
> > +               "%zu < %zu.\n", (size_t)8 * MAX_PARAM_BUFFER_SIZE,
> > +               8 * bs->data_size - bs->data_bit_padding);
> > +        return AVERROR(ENOSPC);
> > +    }
> > +
> > +    memcpy(data, bs->data, bs->data_size);
> > +    *data_len = 8 * bs->data_size - bs->data_bit_padding;
> > +
> > +    return 0;
> > +}
> > +
> > +static int get_relative_dist(const AV1RawSequenceHeader *seq,
> > +                             unsigned int a, unsigned int b)
> > +{
> > +    unsigned int diff, m;
> > +    if (!seq->enable_order_hint)
> > +        return 0;
> > +    diff = a - b;
> > +    m = 1 << seq->order_hint_bits_minus_1;
> > +    diff = (diff & (m - 1)) - (diff & m);
> > +    return diff;
> > +}
> > +
> > +static int write_ns(PutBitContext *pbc, uint32_t n, uint32_t
> > value)
> > +{
> > +    uint32_t w, m, v, extra_bit;
> > +
> > +    w = av_log2(n) + 1;
> > +    m = (1 << w) - n;
> > +
> > +    if (value < m) {
> > +        v = value;
> > +        put_bits(pbc, w - 1, v);
> > +    } else {
> > +        v = m + ((value - m) >> 1);
> > +        extra_bit = (value - m) & 1;
> > +        put_bits(pbc, w - 1, v);
> > +        put_bits(pbc, 1, extra_bit);
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +/**
> > + * This API provide the minmum implemention according current
> > enabled features
> > + * in frame header. If more features will enable in furture,
> > please make sure
> > + * the relative flags of features should be packed correctly into
> > frame header
> > + * obu in this API.
> > + */
> > +static int vaapi_encode_av1_write_frame_header(AVCodecContext
> > *avctx,
> > +                                               VAAPIEncodePicture
> > *pic,
> > +                                               char *data, size_t
> > *data_len)
> > +{
> > +    VAAPIEncodeContext              *ctx = avctx->priv_data;
> > +    VAAPIEncodeAV1Context          *priv = avctx->priv_data;
> > +    AV1RawOBU                    *fh_obu = &priv->fh;
> > +    AV1RawOBU                    *sh_obu = &priv->sh;
> > +    AV1RawFrameHeader                *fh = &fh_obu-
> > >obu.frame.header;
> > +    AV1RawSequenceHeader             *sh = &sh_obu-
> > >obu.sequence_header;
> > +    VAEncPictureParameterBufferAV1 *vpic = pic-
> > >codec_picture_params;
> > +    PutBitContext pbc, pbc_tmp;
> > +    uint8_t byte;
> > +    int qindex, coded_lossless;
> > +    int id_len, frame_is_intra, skip_mode_allowed;
> > +    int start, payload_bits, obu_size, obu_size_len;
> > +    int qindex_offset, loopfilter_offset;
> > +    int cdef_start_offset, cdef_end_offset;
> > +    int i;
> > +
> > +    init_put_bits(&pbc, data, MAX_PARAM_BUFFER_SIZE);
> > +
> > +    /** obu header */
> > +    put_bits(&pbc, 1, fh_obu->header.obu_forbidden_bit);
> > +    put_bits(&pbc, 4, fh_obu->header.obu_type);
> > +    put_bits(&pbc, 1, fh_obu->header.obu_extension_flag);
> > +    put_bits(&pbc, 1, fh_obu->header.obu_has_size_field);
> > +    put_bits(&pbc, 1, fh_obu->header.obu_reserved_1bit);
> > +
> > +    /** record pbc status to re-write obu size later. */
> > +    if (fh_obu->header.obu_has_size_field) {
> > +        pbc_tmp = pbc;
> > +        put_bits32(&pbc, 0);
> > +        put_bits32(&pbc, 0);
> > +    }
> > +
> > +    start = put_bits_count(&pbc);
> > +
> > +    /** uncompressed_header() */
> > +    if (sh->frame_id_numbers_present_flag)
> > +        id_len = sh->additional_frame_id_length_minus_1 +
> > +                 sh->delta_frame_id_length_minus_2 + 3;
> > +
> > +    frame_is_intra = (fh->frame_type == AV1_FRAME_KEY ||
> > +                      fh->frame_type == AV1_FRAME_INTRA_ONLY);
> > +    if (!sh->reduced_still_picture_header) {
> > +        put_bits(&pbc, 1, fh->show_existing_frame);
> > +        if (fh->show_existing_frame) {
> > +            put_bits(&pbc, 3, fh->frame_to_show_map_idx);
> > +            goto trailing_bits;
> > +        }
> > +        if (sh->frame_id_numbers_present_flag)
> > +            put_bits(&pbc, id_len, fh->display_frame_id);
> > +
> > +        put_bits(&pbc, 2, fh->frame_type);
> > +        put_bits(&pbc, 1, fh->show_frame);
> > +        if (!fh->show_frame)
> > +            put_bits(&pbc, 1, fh->showable_frame);
> > +
> > +        if (!(fh->frame_type == AV1_FRAME_SWITCH ||
> > +            (fh->frame_type == AV1_FRAME_KEY && fh->show_frame)))
> > +            put_bits(&pbc, 1, fh->error_resilient_mode);
> > +    }
> > +
> > +    put_bits(&pbc, 1, fh->disable_cdf_update);
> > +    if (sh->seq_force_screen_content_tools ==
> > AV1_SELECT_SCREEN_CONTENT_TOOLS)
> > +        put_bits(&pbc, 1, fh->allow_screen_content_tools);
> > +
> > +    if (fh->allow_screen_content_tools && sh->seq_force_integer_mv 
> > == AV1_SELECT_INTEGER_MV)
> > +        put_bits(&pbc, 1, fh->force_integer_mv);
> > +
> > +    if (sh->frame_id_numbers_present_flag)
> > +        put_bits(&pbc, id_len, fh->current_frame_id);
> > +
> > +    if (fh->frame_type != AV1_FRAME_SWITCH && !sh-
> > >reduced_still_picture_header)
> > +        put_bits(&pbc, 1, fh->frame_size_override_flag);
> > +
> > +    if (sh->enable_order_hint)
> > +        put_bits(&pbc, sh->order_hint_bits_minus_1 + 1, fh-
> > >order_hint);
> > +
> > +    if (!(frame_is_intra || fh->error_resilient_mode))
> > +        put_bits(&pbc, 3, fh->primary_ref_frame);
> > +
> > +    if (!(fh->frame_type == AV1_FRAME_SWITCH ||
> > +        fh->frame_type == AV1_FRAME_KEY && fh->show_frame))
> > +        put_bits(&pbc, 8, fh->refresh_frame_flags);
> > +
> > +    if (frame_is_intra) {
> > +        /** render_size() */
> > +         put_bits(&pbc, 1, fh->render_and_frame_size_different);
> > +    } else {
> > +        if (!frame_is_intra && sh->enable_order_hint)
> > +            put_bits(&pbc, 1, fh->frame_refs_short_signaling);
> > +
> > +        for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
> > +            if (!fh->frame_refs_short_signaling)
> > +                put_bits(&pbc, 3, fh->ref_frame_idx[i]);
> > +        }
> > +
> > +        if (!(fh->frame_size_override_flag && !fh-
> > >error_resilient_mode))
> > +            put_bits(&pbc, 1, fh-
> > >render_and_frame_size_different);
> > +
> > +        if (!fh->force_integer_mv)
> > +            put_bits(&pbc, 1, fh->allow_high_precision_mv);
> > +
> > +        /** read_interpolation_filter() */
> > +        put_bits(&pbc, 1, fh->is_filter_switchable);
> > +        if (!fh->is_filter_switchable)
> > +            put_bits(&pbc, 2, fh->interpolation_filter);
> > +
> > +        put_bits(&pbc, 1, fh->is_motion_mode_switchable);
> > +    }
> > +
> > +    if (!(sh->reduced_still_picture_header || fh-
> > >disable_cdf_update))
> > +        put_bits(&pbc, 1, fh->disable_frame_end_update_cdf);
> > +
> > +    /** tile_info() */
> > +    put_bits(&pbc, 1, fh->uniform_tile_spacing_flag);
> > +    if (fh->uniform_tile_spacing_flag) {
> > +        for (i = 0; i < priv->tile_cols_log2 - priv-
> > >min_log2_tile_cols; i++) {
> > +            put_bits(&pbc, 1, 1);
> > +        }
> > +        if (priv->tile_cols_log2 < priv->max_log2_tile_cols)
> > +            put_bits(&pbc, 1, 0);
> > +
> > +        for (i = 0; i < priv->tile_rows_log2 - priv-
> > >min_log2_tile_rows; i++) {
> > +            put_bits(&pbc, 1, 1);
> > +        }
> > +        if (priv->tile_rows_log2 < priv->max_log2_tile_rows)
> > +            put_bits(&pbc, 1, 0);
> > +    } else {
> > +        int start_sb = 0;
> > +        int max_width, max_height;
> > +        for (i = 0; start_sb < priv->sb_cols; i++) {
> > +            max_width = FFMIN(priv->sb_cols - start_sb, priv-
> > >max_tile_width_sb);
> > +            write_ns(&pbc, max_width, fh-
> > >width_in_sbs_minus_1[i]);
> > +            start_sb += fh->width_in_sbs_minus_1[i] + 1;
> > +        }
> > +
> > +        start_sb = 0;
> > +        for (i = 0; start_sb < priv->sb_rows; i++) {
> > +            max_height = FFMIN(priv->sb_rows - start_sb, priv-
> > >max_tile_height_sb);
> > +            write_ns(&pbc, max_height, fh-
> > >height_in_sbs_minus_1[i]);
> > +            start_sb += fh->height_in_sbs_minus_1[i] + 1;
> > +        }
> > +    }
> > +
> > +    if (priv->tile_cols_log2 || priv->tile_rows_log2) {
> > +        put_bits(&pbc, priv->tile_cols_log2 + priv-
> > >tile_rows_log2, fh->context_update_tile_id);
> > +        put_bits(&pbc, 2, fh->tile_size_bytes_minus1);
> > +    }
> > +
> > +    qindex_offset = put_bits_count(&pbc);
> > +    /** quantization_params() */
> > +    put_bits(&pbc, 8, fh->base_q_idx);
> > +    put_bits(&pbc, 1, fh->delta_q_y_dc);
> > +    put_bits(&pbc, 1, fh->delta_q_u_dc);
> > +    put_bits(&pbc, 1, fh->delta_q_u_ac);
> > +    put_bits(&pbc, 1, fh->using_qmatrix);
> > +
> > +    /** segmentation_params() */
> > +    put_bits(&pbc, 1, fh->segmentation_enabled);
> > +
> > +    /** delta_q_params() */
> > +    if (fh->base_q_idx)
> > +        put_bits(&pbc, 1, fh->delta_q_present);
> > +    if (fh->delta_q_present)
> > +        put_bits(&pbc, 2, fh->delta_q_res);
> > +
> > +    /** delta_lf_params() */
> > +    if (fh->delta_q_present) {
> > +        if (!fh->allow_intrabc)
> > +            put_bits(&pbc, 1, fh->delta_lf_present);
> > +        if (fh->delta_lf_present) {
> > +            put_bits(&pbc, 2, fh->delta_lf_res);
> > +            put_bits(&pbc, 1, fh->delta_lf_multi);
> > +        }
> > +    }
> > +
> > +    /** codelossless */
> > +    coded_lossless = 1;
> > +    for (i = 0; i < AV1_MAX_SEGMENTS; i++) {
> > +        if (fh->segmentation_enabled && fh-
> > >feature_enabled[i][AV1_SEG_LVL_ALT_Q])
> > +            qindex = fh->base_q_idx + fh-
> > >feature_value[i][AV1_SEG_LVL_ALT_Q];
> > +        else
> > +            qindex = fh->base_q_idx;
> > +        qindex = av_clip_uintp2(qindex, 8);
> > +
> > +        if (qindex || fh->delta_q_y_dc || fh->delta_q_u_ac || fh-
> > >delta_q_u_dc ||
> > +            fh->delta_q_v_ac || fh->delta_q_v_dc)
> > +            coded_lossless = 0;
> > +    }
> > +
> > +    loopfilter_offset = put_bits_count(&pbc);
> > +    /** loop_filter_params() */
> > +    if (!(coded_lossless || fh->allow_intrabc)) {
> > +        put_bits(&pbc, 6, fh->loop_filter_level[0]);
> > +        put_bits(&pbc, 6, fh->loop_filter_level[1]);
> > +        if (fh->loop_filter_level[0] || fh->loop_filter_level[1])
> > {
> > +            put_bits(&pbc, 6, fh->loop_filter_level[2]);
> > +            put_bits(&pbc, 6, fh->loop_filter_level[3]);
> > +        }
> > +        put_bits(&pbc, 3, fh->loop_filter_sharpness);
> > +        put_bits(&pbc, 1, fh->loop_filter_delta_enabled);
> > +    }
> > +
> > +    cdef_start_offset = put_bits_count(&pbc);
> > +    /** cdef_params() */
> > +    if (!(coded_lossless || fh->allow_intrabc || !sh-
> > >enable_cdef)) {
> > +        put_bits(&pbc, 2, fh->cdef_damping_minus_3);
> > +        put_bits(&pbc, 2, fh->cdef_bits);
> > +        for (i = 0; i < (1 << fh->cdef_bits); i++) {
> > +            put_bits(&pbc, 4, fh->cdef_y_pri_strength[i]);
> > +            put_bits(&pbc, 2, fh->cdef_y_sec_strength[i]);
> > +            put_bits(&pbc, 4, fh->cdef_uv_pri_strength[i]);
> > +            put_bits(&pbc, 2, fh->cdef_uv_sec_strength[i]);
> > +        }
> > +    }
> > +    cdef_end_offset = put_bits_count(&pbc);
> > +
> > +    /** read_tx_mode() */
> > +    if (fh->tx_mode == AV1_TX_MODE_SELECT)
> > +        put_bits(&pbc, 1, 1);
> > +    else
> > +        put_bits(&pbc, 1, 0);
> > +
> > +    /** frame_reference_mode() */
> > +    if (!frame_is_intra)
> > +        put_bits(&pbc, 1, fh->reference_select);
> > +
> > +    /** skip_mode_params() */
> > +    if (frame_is_intra || !fh->reference_select || !sh-
> > >enable_order_hint)
> > +        skip_mode_allowed = 0;
> > +    else {
> > +        int forward_idx, backward_idx;
> > +        int ref_hint, forward_hint, backward_hint;
> > +
> > +        forward_idx = -1;
> > +        backward_idx = -1;
> > +
> > +        for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
> > +            ref_hint = fh->ref_order_hint[fh->ref_frame_idx[i]];
> > +            if (get_relative_dist(sh, ref_hint, fh->order_hint) <
> > 0) {
> > +                if (forward_idx < 0 || get_relative_dist(sh,
> > ref_hint, forward_idx) > 0) {
> > +                    forward_idx = i;
> > +                    forward_hint = ref_hint;
> > +                }
> > +            } else if (get_relative_dist(sh, ref_hint, fh-
> > >order_hint) > 0) {
> > +                if (backward_idx < 0 || get_relative_dist(sh,
> > ref_hint, backward_hint) < 0) {
> > +                    backward_idx = i;
> > +                    backward_hint = ref_hint;
> > +                }
> > +            }
> > +        }
> > +        if (forward_idx < 0)
> > +            skip_mode_allowed = 0;
> > +        else if (backward_idx >= 0)
> > +            skip_mode_allowed = 1;
> > +        else {
> > +            int second_forward_idx, second_forward_hint;
> > +            second_forward_idx = -1;
> > +            for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
> > +                ref_hint = fh->ref_order_hint[fh-
> > >ref_frame_idx[i]];
> > +                if (get_relative_dist(sh, ref_hint, forward_hint)
> > < 0) {
> > +                    if (second_forward_idx < 0 ||
> > get_relative_dist(sh, ref_hint, second_forward_hint) > 0){
> > +                        second_forward_idx = i;
> > +                        second_forward_hint = ref_hint;
> > +                    }
> > +                }
> > +            }
> > +            if (second_forward_idx < 0)
> > +                skip_mode_allowed = 0;
> > +            else
> > +                skip_mode_allowed = 1;
> > +        }
> > +    }
> > +
> > +    if (skip_mode_allowed)
> > +        put_bits(&pbc, 1, fh->skip_mode_present);
> > +
> > +    put_bits(&pbc, 1, fh->reduced_tx_set);
> > +
> > +    /** global_motion_params() */
> > +    if (!frame_is_intra) {
> > +        for (i = AV1_REF_FRAME_LAST; i <= AV1_REF_FRAME_ALTREF;
> > i++) {
> > +            put_bits(&pbc, 1, fh->is_global[i]);
> > +            if (fh->is_global[i]) {
> > +                put_bits(&pbc, 1, fh->is_rot_zoom[i]);
> > +                if (!fh->is_rot_zoom[i])
> > +                    put_bits(&pbc, 1, fh->is_translation[i]);
> > +            }
> > +        }
> > +    }
> > +
> > +trailing_bits:
> > +    payload_bits = put_bits_count(&pbc) - start;
> > +
> > +    /** trailing_bits() */
> > +    put_bits(&pbc, 1, 1);
> > +    obu_size = (put_bits_count(&pbc) - start + 7) / 8;
> > +    for (i = 0; i < obu_size * 8 - payload_bits - 1; i++)
> > +        put_bits(&pbc, 1, 0);
> > +
> > +    flush_put_bits(&pbc);
> > +    *data_len = put_bits_count(&pbc);
> > +
> > +    /** update obu size in bitstream */
> > +    if (fh_obu->header.obu_has_size_field) {
> > +        obu_size_len = priv->attr_ext2.bits.obu_size_bytes_minus1
> > + 1;
> > +        for (i = 0; i < obu_size_len; i++) {
> > +            byte = obu_size >> (7 * i) & 0x7f;
> > +            if (i < obu_size_len - 1)
> > +                byte |= 0x80;
> > +            put_bits(&pbc_tmp, 8, byte);
> > +        }
> > +        flush_put_bits(&pbc_tmp);
> > +        memmove(pbc_tmp.buf_ptr, pbc_tmp.buf_ptr + (8 -
> > obu_size_len), obu_size);
> > +        *data_len -= (8 - obu_size_len) * 8;
> > +    }
> > +
> > +    if (fh->show_existing_frame)
> > +        return 0;
> > +
> > +    if (!(ctx->va_rc_mode & VA_RC_CQP)) {
> > +        vpic->min_base_qindex = av_clip(avctx->qmin, 1,
> > AV1_MAX_QUANT);
> > +        vpic->max_base_qindex = av_clip(avctx->qmax, 1,
> > AV1_MAX_QUANT);
> > +
> > +        vpic->bit_offset_qindex            = qindex_offset - (8 -
> > obu_size_len) * 8;
> > +        vpic->bit_offset_loopfilter_params = loopfilter_offset -
> > (8 - obu_size_len) * 8;
> > +        vpic->bit_offset_cdef_params       = cdef_start_offset -
> > (8 - obu_size_len) * 8;
> > +        vpic->size_in_bits_cdef_params     = cdef_end_offset -
> > cdef_start_offset;
> > +        vpic->size_in_bits_frame_hdr_obu   = *data_len;
> > +
> > +        vpic->byte_offset_frame_hdr_obu_size = (((pic->type ==
> > PICTURE_TYPE_IDR) ?
> > +                                               priv->sh_data_len /
> > 8 : 0) +
> > +                                               (fh_obu-
> > >header.obu_extension_flag ?
> > +                                               2 : 1));
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +static int tile_log2(int blkSize, int target) {
> > +    int k;
> > +    for (k = 0; (blkSize << k) < target; k++);
> > +    return k;
> > +}
> > +
> > +static int vaapi_encode_av1_set_tile(AVCodecContext *avctx)
> > +{
> > +    VAAPIEncodeAV1Context *priv = avctx->priv_data;
> > +    int mi_cols, mi_rows, sb_shift, sb_size;
> > +    int max_tile_area_sb, max_tile_area_sb_varied;
> > +    int tile_width_sb, tile_height_sb, widest_tile_sb;
> > +    int min_log2_tiles;
> > +    int tile_rows_tmp, i;
> > +
> > +    if (priv->tile_cols > AV1_MAX_TILE_COLS ||
> > +        priv->tile_rows > AV1_MAX_TILE_ROWS) {
> > +        av_log(avctx, AV_LOG_ERROR, "Invalid tile number %dx%d,
> > should less than %dx%d.\n",
> > +               priv->tile_cols, priv->tile_rows,
> > AV1_MAX_TILE_COLS, AV1_MAX_TILE_ROWS);
> > +        return AVERROR(EINVAL);
> > +    }
> > +
> > +    mi_cols = 2 * ((avctx->width + 7) >> 3);
> > +    mi_rows = 2 * ((avctx->height + 7) >> 3);
> > +    priv->sb_cols = priv->use_128x128_superblock ?
> > +                    ((mi_cols + 31) >> 5) : ((mi_cols + 15) >> 4);
> > +    priv->sb_rows = priv->use_128x128_superblock ?
> > +                    ((mi_rows + 31) >> 5) : ((mi_rows + 15) >> 4);
> > +    sb_shift = priv->use_128x128_superblock ? 5 : 4;
> > +    sb_size  = sb_shift + 2;
> > +    priv->max_tile_width_sb = AV1_MAX_TILE_WIDTH >> sb_size;
> > +    max_tile_area_sb = AV1_MAX_TILE_AREA  >> (2 * sb_size);
> > +
> > +    priv->min_log2_tile_cols = tile_log2(priv->max_tile_width_sb,
> > priv->sb_cols);
> > +    priv->max_log2_tile_cols = tile_log2(1, FFMIN(priv->sb_cols,
> > AV1_MAX_TILE_COLS));
> > +    priv->max_log2_tile_rows = tile_log2(1, FFMIN(priv->sb_rows,
> > AV1_MAX_TILE_ROWS));
> > +    min_log2_tiles = FFMAX(priv->min_log2_tile_cols,
> > +                           tile_log2(max_tile_area_sb, priv-
> > >sb_rows * priv->sb_cols));
> > +
> > +    if (priv->tile_cols != av_clip(priv->tile_cols, (priv->sb_cols 
> > + priv->max_tile_width_sb - 1) / priv->max_tile_width_sb, priv-
> > >sb_cols)) {
> > +        priv->tile_cols = av_clip(priv->tile_cols, (priv->sb_cols
> > + priv->max_tile_width_sb - 1) / priv->max_tile_width_sb, priv-
> > >sb_cols);
> > +        av_log(avctx, AV_LOG_WARNING, "Invalid tile cols, correct
> > to %d.\n", priv->tile_cols);
> > +    }
> > +
> > +    priv->tile_cols_log2 = tile_log2(1, priv->tile_cols);
> > +    tile_width_sb = (priv->sb_cols + (1 << priv->tile_cols_log2) -
> > 1) >>
> > +                    priv->tile_cols_log2;
> > +
> > +    if (priv->tile_rows > priv->sb_rows) {
> > +        priv->tile_rows = priv->sb_rows;
> > +        av_log(avctx, AV_LOG_WARNING, "Invalid tile rows, correct
> > to %d.\n", priv->tile_rows);
> > +    }
> > +
> > +    for (tile_rows_tmp = priv->tile_rows; tile_rows_tmp <= priv-
> > >sb_rows && tile_rows_tmp <= AV1_MAX_TILE_ROWS; tile_rows_tmp++) {
> > +        /** try uniformed tile. */
> > +        priv->tile_rows_log2 = tile_log2(1, tile_rows_tmp);
> > +        if ((priv->sb_cols + tile_width_sb - 1) / tile_width_sb ==
> > priv->tile_cols) {
> > +            for (i = 0; i < priv->tile_cols - 1; i++)
> > +                priv->width_in_sbs_minus_1[i] = tile_width_sb - 1;
> > +            priv->width_in_sbs_minus_1[i] = priv->sb_cols - (priv-
> > >tile_cols - 1) * tile_width_sb - 1;
> > +
> > +            tile_height_sb = (priv->sb_rows + (1 << priv-
> > >tile_rows_log2) - 1) >>
> > +                             priv->tile_rows_log2;
> > +
> > +            if ((priv->sb_rows + tile_height_sb - 1) /
> > tile_height_sb == tile_rows_tmp &&
> > +                tile_height_sb <= max_tile_area_sb /
> > tile_width_sb) {
> > +                for (i = 0; i < tile_rows_tmp - 1; i++)
> > +                    priv->height_in_sbs_minus_1[i] =
> > tile_height_sb - 1;
> > +                priv->height_in_sbs_minus_1[i] = priv->sb_rows -
> > (tile_rows_tmp - 1) * tile_height_sb - 1;
> > +
> > +                priv->uniform_tile = 1;
> > +                priv->min_log2_tile_rows = FFMAX(min_log2_tiles -
> > priv->tile_cols_log2, 0);
> > +
> > +                break;
> > +            }
> > +        }
> > +
> > +        /** try non-uniformed tile. */
> > +        widest_tile_sb = 0;
> > +        for (i = 0; i < priv->tile_cols; i++) {
> > +            priv->width_in_sbs_minus_1[i] = (i + 1) * priv-
> > >sb_cols / priv->tile_cols - i * priv->sb_cols / priv->tile_cols -
> > 1;
> > +            widest_tile_sb = FFMAX(widest_tile_sb, priv-
> > >width_in_sbs_minus_1[i] + 1);
> > +        }
> > +
> > +        if (min_log2_tiles)
> > +            max_tile_area_sb_varied = (priv->sb_rows * priv-
> > >sb_cols) >> (min_log2_tiles + 1);
> > +        else
> > +            max_tile_area_sb_varied = priv->sb_rows * priv-
> > >sb_cols;
> > +        priv->max_tile_height_sb = FFMAX(1,
> > max_tile_area_sb_varied / widest_tile_sb);
> > +
> > +        if (tile_rows_tmp == av_clip(tile_rows_tmp, (priv->sb_rows 
> > + priv->max_tile_height_sb - 1) / priv->max_tile_height_sb, priv-
> > >sb_rows)) {
> > +            for (i = 0; i < tile_rows_tmp; i++)
> > +                priv->height_in_sbs_minus_1[i] = (i + 1) * priv-
> > >sb_rows / tile_rows_tmp - i * priv->sb_rows / tile_rows_tmp - 1;
> > +
> > +            break;
> > +        }
> > +    }
> > +
> > +    if (priv->tile_rows != tile_rows_tmp) {
> > +        priv->tile_rows = tile_rows_tmp;
> > +        av_log(avctx, AV_LOG_WARNING, "Invalid tile rows, correct
> > to %d.\n", priv->tile_rows);
> > +    }
> > +
> > +    /** check if tile cols/rows is supported by driver. */
> > +    if (priv->attr_ext2.bits.max_tile_num_minus1) {
> > +        if ((priv->tile_cols * priv->tile_rows - 1) > priv-
> > >attr_ext2.bits.max_tile_num_minus1) {
> > +            av_log(avctx, AV_LOG_ERROR, "Unsupported tile num %d *
> > %d = %d by driver, "
> > +                   "should be less than %d.\n", priv->tile_cols,
> > priv->tile_rows,
> > +                   priv->tile_cols * priv->tile_rows,
> > +                   priv->attr_ext2.bits.max_tile_num_minus1 + 1);
> > +            return AVERROR(EINVAL);
> > +        }
> > +    }
> > +    av_log(avctx, AV_LOG_DEBUG, "Setting tile cols/rows to
> > %d/%d.\n",
> > +           priv->tile_cols, priv->tile_rows);
> > +
> > +    /** check if tile group numbers is valid. */
> > +    if (priv->tile_groups > priv->tile_cols * priv->tile_rows) {
> > +        av_log(avctx, AV_LOG_WARNING, "Invalid tile groups number
> > %d, "
> > +        "correct to %d.\n", priv->tile_groups, priv->tile_cols *
> > priv->tile_rows);
> > +        priv->tile_groups = priv->tile_cols * priv->tile_rows;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +static int vaapi_encode_av1_write_sequence_header(AVCodecContext
> > *avctx,
> > +                                                  char *data,
> > size_t *data_len)
> > +{
> > +    VAAPIEncodeAV1Context *priv = avctx->priv_data;
> > +
> > +    memcpy(data, &priv->sh_data, MAX_PARAM_BUFFER_SIZE *
> > sizeof(char));
> > +    *data_len = priv->sh_data_len;
> > +
> > +    return 0;
> > +}
> > +
> > +static int vaapi_encode_av1_init_sequence_params(AVCodecContext
> > *avctx)
> > +{
> > +    VAAPIEncodeContext               *ctx = avctx->priv_data;
> > +    VAAPIEncodeAV1Context           *priv = avctx->priv_data;
> > +    AV1RawOBU                     *sh_obu = &priv->sh;
> > +    AV1RawSequenceHeader              *sh = &sh_obu-
> > >obu.sequence_header;
> > +    VAEncSequenceParameterBufferAV1 *vseq = ctx-
> > >codec_sequence_params;
> > +    CodedBitstreamFragment           *obu = &priv->current_obu;
> > +    const AVPixFmtDescriptor *desc;
> > +    int ret;
> > +
> > +    memset(sh_obu, 0, sizeof(*sh_obu));
> > +    sh_obu->header.obu_type = AV1_OBU_SEQUENCE_HEADER;
> > +
> > +    desc = av_pix_fmt_desc_get(priv->common.input_frames-
> > >sw_format);
> > +    av_assert0(desc);
> > +
> > +    sh->seq_profile  = avctx->profile;
> > +    if (!sh->seq_force_screen_content_tools)
> > +        sh->seq_force_integer_mv = AV1_SELECT_INTEGER_MV;
> > +    sh->frame_width_bits_minus_1  = av_log2(avctx->width);
> > +    sh->frame_height_bits_minus_1 = av_log2(avctx->height);
> > +    sh->max_frame_width_minus_1   = avctx->width - 1;
> > +    sh->max_frame_height_minus_1  = avctx->height - 1;
> > +    sh->seq_tier[0]               = priv->tier;
> > +    /** enable order hint and reserve maximum 8 bits for it by
> > default. */
> > +    sh->enable_order_hint         = 1;
> > +    sh->order_hint_bits_minus_1   = 7;
> > +
> > +    sh->color_config = (AV1RawColorConfig) {
> > +        .high_bitdepth                  = desc->comp[0].depth == 8
> > ? 0 : 1,
> > +        .color_primaries                = avctx->color_primaries,
> > +        .transfer_characteristics       = avctx->color_trc,
> > +        .matrix_coefficients            = avctx->colorspace,
> > +        .color_description_present_flag = (avctx->color_primaries
> > != AVCOL_PRI_UNSPECIFIED ||
> > +                                           avctx-
> > >color_trc       != AVCOL_TRC_UNSPECIFIED ||
> > +                                           avctx-
> > >colorspace      != AVCOL_SPC_UNSPECIFIED),
> 
> This should also set color_range:
> 
>         .color_range                    = avctx->color_range ==
> AVCOL_RANGE_JPEG,

Added in V2. Thanks.

Fei

> 
> 
> > +        .subsampling_x                  = desc->log2_chroma_w,
> > +        .subsampling_y                  = desc->log2_chroma_h,
> > +    };
> > +
> > +    if (avctx->level != FF_LEVEL_UNKNOWN) {
> > +        sh->seq_level_idx[0] = avctx->level;
> > +    } else {
> > +        const AV1LevelDescriptor *level;
> > +        float framerate;
> > +
> > +        if (avctx->framerate.num > 0 && avctx->framerate.den > 0)
> > +            framerate = avctx->framerate.num / avctx-
> > >framerate.den;
> > +        else
> > +            framerate = 0;
> > +
> > +        level = ff_av1_guess_level(avctx->bit_rate, priv->tier,
> > +                                   ctx->surface_width, ctx-
> > >surface_height,
> > +                                   priv->tile_rows * priv-
> > >tile_cols,
> > +                                   priv->tile_cols, framerate);
> > +        if (level) {
> > +            av_log(avctx, AV_LOG_VERBOSE, "Using level %s.\n",
> > level->name);
> > +            sh->seq_level_idx[0] = level->level_idx;
> > +        } else {
> > +            av_log(avctx, AV_LOG_VERBOSE, "Stream will not conform
> > to "
> > +                   "any normal level, using level 6.3 by
> > default.\n");
> > +            sh->seq_level_idx[0] = 19;
> > +            sh->seq_tier[0] = 1;
> > +        }
> > +    }
> > +    vseq->seq_profile             = sh->seq_profile;
> > +    vseq->seq_level_idx           = sh->seq_level_idx[0];
> > +    vseq->seq_tier                = sh->seq_tier[0];
> > +    vseq->order_hint_bits_minus_1 = sh->order_hint_bits_minus_1;
> > +    vseq->intra_period            = ctx->gop_size;
> > +    vseq->ip_period               = ctx->b_per_p + 1;
> > +
> > +    vseq->seq_fields.bits.enable_order_hint = sh-
> > >enable_order_hint;
> > +
> > +    if (!(ctx->va_rc_mode & VA_RC_CQP)) {
> > +        vseq->bits_per_second = ctx->va_bit_rate;
> > +        vseq->seq_fields.bits.enable_cdef = sh->enable_cdef = 1;
> > +    }
> > +
> > +    ret = vaapi_encode_av1_add_obu(avctx, obu,
> > AV1_OBU_SEQUENCE_HEADER, &priv->sh);
> > +    if (ret < 0)
> > +        goto end;
> > +
> > +    ret = vaapi_encode_av1_write_obu(avctx, priv->sh_data, &priv-
> > >sh_data_len, obu);
> > +    if (ret < 0)
> > +        goto end;
> > +
> > +end:
> > +    ff_cbs_fragment_reset(obu);
> > +    return ret;
> > +}
> > +
> > +static int vaapi_encode_av1_init_picture_params(AVCodecContext
> > *avctx,
> > +                                                VAAPIEncodePicture
> > *pic)
> > +{
> > +    VAAPIEncodeAV1Context          *priv = avctx->priv_data;
> > +    VAAPIEncodeAV1Picture          *hpic = pic->priv_data;
> > +    AV1RawOBU                    *fh_obu = &priv->fh;
> > +    AV1RawFrameHeader                *fh = &fh_obu-
> > >obu.frame.header;
> > +    VAEncPictureParameterBufferAV1 *vpic = pic-
> > >codec_picture_params;
> > +    CodedBitstreamFragment          *obu = &priv->current_obu;
> > +    VAAPIEncodePicture    *ref;
> > +    VAAPIEncodeAV1Picture *href;
> > +    int slot, i;
> > +    int ret;
> > +    static const int8_t
> > default_loop_filter_ref_deltas[AV1_TOTAL_REFS_PER_FRAME] =
> > +        { 1, 0, 0, 0, -1, 0, -1, -1 };
> > +
> > +    memset(fh_obu, 0, sizeof(*fh_obu));
> > +    pic->nb_slices = priv->tile_groups;
> > +    fh_obu->header.obu_type = AV1_OBU_FRAME_HEADER;
> > +    fh_obu->header.obu_has_size_field = 1;
> > +
> > +    switch (pic->type) {
> > +    case PICTURE_TYPE_IDR:
> > +        av_assert0(pic->nb_refs[0] == 0 || pic->nb_refs[1]);
> > +        fh->frame_type = AV1_FRAME_KEY;
> > +        fh->refresh_frame_flags = 0xFF;
> > +        fh->base_q_idx = priv->q_idx_idr;
> > +        hpic->slot = 0;
> > +        hpic->last_idr_frame = pic->display_order;
> > +        break;
> > +    case PICTURE_TYPE_P:
> > +        av_assert0(pic->nb_refs[0]);
> > +        fh->frame_type = AV1_FRAME_INTER;
> > +        fh->base_q_idx = priv->q_idx_p;
> > +        ref = pic->refs[0][pic->nb_refs[0] - 1];
> > +        href = ref->priv_data;
> > +        hpic->slot = !href->slot;
> > +        hpic->last_idr_frame = href->last_idr_frame;
> > +        fh->refresh_frame_flags = 1 << hpic->slot;
> > +
> > +        /** set the nearest frame in L0 as all reference frame. */
> > +        for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
> > +            fh->ref_frame_idx[i] = href->slot;
> > +        }
> > +        fh->primary_ref_frame = href->slot;
> > +        fh->ref_order_hint[href->slot] = ref->display_order -
> > href->last_idr_frame;
> > +        vpic->ref_frame_ctrl_l0.fields.search_idx0 =
> > AV1_REF_FRAME_LAST;
> > +
> > +        /** set the 2nd nearest frame in L0 as Golden frame. */
> > +        if (pic->nb_refs[0] > 1) {
> > +            ref = pic->refs[0][pic->nb_refs[0] - 2];
> > +            href = ref->priv_data;
> > +            fh->ref_frame_idx[3] = href->slot;
> > +            fh->ref_order_hint[href->slot] = ref->display_order -
> > href->last_idr_frame;
> > +            vpic->ref_frame_ctrl_l0.fields.search_idx1 =
> > AV1_REF_FRAME_GOLDEN;
> > +        }
> > +        break;
> > +    case PICTURE_TYPE_B:
> > +        av_assert0(pic->nb_refs[0] && pic->nb_refs[1]);
> > +        fh->frame_type = AV1_FRAME_INTER;
> > +        fh->base_q_idx = priv->q_idx_b;
> > +        fh->refresh_frame_flags = 0x0;
> > +        fh->reference_select = 1;
> > +
> > +        /** B frame will not be referenced, disable its recon
> > frame. */
> > +        vpic->picture_flags.bits.disable_frame_recon = 1;
> > +
> > +        /** Use LAST_FRAME and BWDREF_FRAME for reference. */
> > +        vpic->ref_frame_ctrl_l0.fields.search_idx0 =
> > AV1_REF_FRAME_LAST;
> > +        vpic->ref_frame_ctrl_l1.fields.search_idx0 =
> > AV1_REF_FRAME_BWDREF;
> > +
> > +        ref                            = pic->refs[0][pic-
> > >nb_refs[0] - 1];
> > +        href                           = ref->priv_data;
> > +        hpic->last_idr_frame           = href->last_idr_frame;
> > +        fh->primary_ref_frame          = href->slot;
> > +        fh->ref_order_hint[href->slot] = ref->display_order -
> > href->last_idr_frame;
> > +        for (i = 0; i < AV1_REF_FRAME_GOLDEN; i++) {
> > +            fh->ref_frame_idx[i] = href->slot;
> > +        }
> > +
> > +        ref                            = pic->refs[1][pic-
> > >nb_refs[1] - 1];
> > +        href                           = ref->priv_data;
> > +        fh->ref_order_hint[href->slot] = ref->display_order -
> > href->last_idr_frame;
> > +        for (i = AV1_REF_FRAME_GOLDEN; i < AV1_REFS_PER_FRAME;
> > i++) {
> > +            fh->ref_frame_idx[i] = href->slot;
> > +        }
> > +        break;
> > +    default:
> > +        av_assert0(0 && "invalid picture type");
> > +    }
> > +
> > +    fh->show_frame                = pic->display_order <= pic-
> > >encode_order;
> > +    fh->showable_frame            = fh->frame_type !=
> > AV1_FRAME_KEY;
> > +    fh->frame_width_minus_1       = avctx->width - 1;
> > +    fh->frame_height_minus_1      = avctx->height - 1;
> > +    fh->render_width_minus_1      = fh->frame_width_minus_1;
> > +    fh->render_height_minus_1     = fh->frame_height_minus_1;
> > +    fh->order_hint                = pic->display_order - hpic-
> > >last_idr_frame;
> > +    fh->tile_cols                 = priv->tile_cols;
> > +    fh->tile_rows                 = priv->tile_rows;
> > +    fh->tile_cols_log2            = priv->tile_cols_log2;
> > +    fh->tile_rows_log2            = priv->tile_rows_log2;
> > +    fh->uniform_tile_spacing_flag = priv->uniform_tile;
> > +    fh->tile_size_bytes_minus1    = priv-
> > >attr_ext2.bits.tile_size_bytes_minus1;
> > +    fh->reduced_tx_set            = 1;
> > +
> > +    /** ignore ONLY_4x4 mode for codedlossless is not fully
> > implemented. */
> > +    if (priv->attr_ext2.bits.tx_mode_support & 0x04)
> > +        fh->tx_mode = AV1_TX_MODE_SELECT;
> > +    else if (priv->attr_ext2.bits.tx_mode_support & 0x02)
> > +        fh->tx_mode = AV1_TX_MODE_LARGEST;
> > +    else
> > +        return AVERROR(EINVAL);
> > +
> > +    for (i = 0; i < fh->tile_cols; i++)
> > +        fh->width_in_sbs_minus_1[i] = vpic-
> > >width_in_sbs_minus_1[i] = priv->width_in_sbs_minus_1[i];
> > +
> > +    for (i = 0; i < fh->tile_rows; i++)
> > +        fh->height_in_sbs_minus_1[i] = vpic-
> > >height_in_sbs_minus_1[i] = priv->height_in_sbs_minus_1[i];
> > +
> > +    memcpy(fh->loop_filter_ref_deltas,
> > default_loop_filter_ref_deltas,
> > +           AV1_TOTAL_REFS_PER_FRAME * sizeof(int8_t));
> > +
> > +    if (fh->frame_type == AV1_FRAME_KEY && fh->show_frame) {
> > +        fh->error_resilient_mode = 1;
> > +    }
> > +
> > +    if (fh->frame_type == AV1_FRAME_KEY || fh-
> > >error_resilient_mode)
> > +        fh->primary_ref_frame = AV1_PRIMARY_REF_NONE;
> > +
> > +    vpic->base_qindex          = fh->base_q_idx;
> > +    vpic->frame_width_minus_1  = fh->frame_width_minus_1;
> > +    vpic->frame_height_minus_1 = fh->frame_height_minus_1;
> > +    vpic->primary_ref_frame    = fh->primary_ref_frame;
> > +    vpic->reconstructed_frame  = pic->recon_surface;
> > +    vpic->coded_buf            = pic->output_buffer;
> > +    vpic->tile_cols            = fh->tile_cols;
> > +    vpic->tile_rows            = fh->tile_rows;
> > +    vpic->order_hint           = fh->order_hint;
> > +#if VA_CHECK_VERSION(1, 15, 0)
> > +    vpic->refresh_frame_flags  = fh->refresh_frame_flags;
> > +#endif
> > +
> > +    vpic->picture_flags.bits.enable_frame_obu     = 0;
> > +    vpic->picture_flags.bits.frame_type           = fh-
> > >frame_type;
> > +    vpic->picture_flags.bits.reduced_tx_set       = fh-
> > >reduced_tx_set;
> > +    vpic->picture_flags.bits.error_resilient_mode = fh-
> > >error_resilient_mode;
> > +
> > +    /** let driver decide to use single or compound reference
> > prediction mode. */
> > +    vpic->mode_control_flags.bits.reference_mode = fh-
> > >reference_select ? 2 : 0;
> > +    vpic->mode_control_flags.bits.tx_mode = fh->tx_mode;
> > +
> > +    vpic->tile_group_obu_hdr_info.bits.obu_has_size_field = 1;
> > +
> > +    /** set reference. */
> > +    for (i = 0; i < AV1_REFS_PER_FRAME; i++)
> > +        vpic->ref_frame_idx[i] = fh->ref_frame_idx[i];
> > +
> > +    for (i = 0; i < FF_ARRAY_ELEMS(vpic->reference_frames); i++)
> > +        vpic->reference_frames[i] = VA_INVALID_SURFACE;
> > +
> > +    for (i = 0; i < MAX_REFERENCE_LIST_NUM; i++) {
> > +        for (int j = 0; j < pic->nb_refs[i]; j++) {
> > +            VAAPIEncodePicture *ref_pic = pic->refs[i][j];
> > +
> > +            slot = ((VAAPIEncodeAV1Picture*)ref_pic->priv_data)-
> > >slot;
> > +            av_assert0(vpic->reference_frames[slot] ==
> > VA_INVALID_SURFACE);
> > +
> > +            vpic->reference_frames[slot] = ref_pic->recon_surface;
> > +        }
> > +    }
> > +
> > +    /** pack frame header, and set va params offset like
> > bit_offset_qindex etc. */
> > +    ret = vaapi_encode_av1_write_frame_header(avctx, pic, priv-
> > >fh_data, &priv->fh_data_len);
> > +    if (ret < 0)
> > +        goto end;
> > +
> > +end:
> > +    ff_cbs_fragment_reset(obu);
> > +    return ret;
> > +}
> > +
> > +static int vaapi_encode_av1_init_slice_params(AVCodecContext
> > *avctx,
> > +                                              VAAPIEncodePicture
> > *pic,
> > +                                              VAAPIEncodeSlice
> > *slice)
> > +{
> > +    VAAPIEncodeAV1Context      *priv = avctx->priv_data;
> > +    VAEncTileGroupBufferAV1  *vslice = slice->codec_slice_params;
> > +    CodedBitstreamAV1Context  *cbctx = priv->cbc->priv_data;
> > +    int div;
> > +
> > +    /** Set tile group info. */
> > +    div = priv->tile_cols * priv->tile_rows / priv->tile_groups;
> > +    vslice->tg_start = slice->index * div;
> > +    if (slice->index == (priv->tile_groups - 1)) {
> > +        vslice->tg_end = priv->tile_cols * priv->tile_rows - 1;
> > +        cbctx->seen_frame_header = 0;
> > +    } else {
> > +        vslice->tg_end = (slice->index + 1) * div - 1;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +static int vaapi_encode_av1_write_picture_header(AVCodecContext
> > *avctx,
> > +                                                 VAAPIEncodePictur
> > e *pic,
> > +                                                 char *data,
> > size_t *data_len)
> > +{
> > +    VAAPIEncodeAV1Context     *priv = avctx->priv_data;
> > +    CodedBitstreamFragment     *obu = &priv->current_obu;
> > +    AV1RawOBU               *fh_obu = &priv->fh;
> > +    AV1RawFrameHeader       *rep_fh = &fh_obu->obu.frame_header;
> > +    VAAPIEncodeAV1Picture *href;
> > +    int ret = 0;
> > +
> > +    pic->tail_size = 0;
> > +    /** Pack repeat frame header. */
> > +    if (pic->display_order > pic->encode_order) {
> > +        memset(fh_obu, 0, sizeof(*fh_obu));
> > +        href = pic->refs[0][pic->nb_refs[0] - 1]->priv_data;
> > +        fh_obu->header.obu_type = AV1_OBU_FRAME_HEADER;
> > +        fh_obu->header.obu_has_size_field = 1;
> > +
> > +        rep_fh->show_existing_frame   = 1;
> > +        rep_fh->frame_to_show_map_idx = href->slot == 0;
> > +        rep_fh->frame_type            = AV1_FRAME_INTER;
> > +        rep_fh->frame_width_minus_1   = avctx->width - 1;
> > +        rep_fh->frame_height_minus_1  = avctx->height - 1;
> > +        rep_fh->render_width_minus_1  = rep_fh-
> > >frame_width_minus_1;
> > +        rep_fh->render_height_minus_1 = rep_fh-
> > >frame_height_minus_1;
> > +
> > +        ret = vaapi_encode_av1_write_frame_header(avctx, pic, pic-
> > >tail_data, &pic->tail_size);
> > +        if (ret < 0)
> > +            goto end;
> > +
> > +        pic->tail_size /= 8;
> > +    }
> > +
> > +    memcpy(data, &priv->fh_data, MAX_PARAM_BUFFER_SIZE *
> > sizeof(char));
> > +    *data_len = priv->fh_data_len;
> > +
> > +end:
> > +    ff_cbs_fragment_reset(obu);
> > +    return ret;
> > +}
> > +
> > +static const VAAPIEncodeProfile vaapi_encode_av1_profiles[] = {
> > +    { FF_PROFILE_AV1_MAIN,  8, 3, 1, 1, VAProfileAV1Profile0 },
> > +    { FF_PROFILE_AV1_MAIN, 10, 3, 1, 1, VAProfileAV1Profile0 },
> > +    { FF_PROFILE_UNKNOWN }
> > +};
> > +
> > +static const VAAPIEncodeType vaapi_encode_type_av1 = {
> > +    .profiles        = vaapi_encode_av1_profiles,
> > +    .flags           = FLAG_B_PICTURES,
> > +    .default_quality = 25,
> > +    .configure       = &vaapi_encode_av1_configure,
> > +
> > +    .sequence_header_type  = VAEncPackedHeaderSequence,
> > +    .sequence_params_size  =
> > sizeof(VAEncSequenceParameterBufferAV1),
> > +    .init_sequence_params  =
> > &vaapi_encode_av1_init_sequence_params,
> > +    .write_sequence_header =
> > &vaapi_encode_av1_write_sequence_header,
> > +
> > +    .picture_priv_data_size = sizeof(VAAPIEncodeAV1Picture),
> > +    .picture_header_type    = VAEncPackedHeaderPicture,
> > +    .picture_params_size    =
> > sizeof(VAEncPictureParameterBufferAV1),
> > +    .init_picture_params    =
> > &vaapi_encode_av1_init_picture_params,
> > +    .write_picture_header   =
> > &vaapi_encode_av1_write_picture_header,
> > +
> > +    .slice_params_size = sizeof(VAEncTileGroupBufferAV1),
> > +    .init_slice_params = &vaapi_encode_av1_init_slice_params,
> > +};
> > +
> > +static av_cold int vaapi_encode_av1_init(AVCodecContext *avctx)
> > +{
> > +    VAAPIEncodeContext      *ctx = avctx->priv_data;
> > +    VAAPIEncodeAV1Context  *priv = avctx->priv_data;
> > +    VAConfigAttrib attr;
> > +    VAStatus vas;
> > +    int ret;
> > +
> > +    ctx->codec = &vaapi_encode_type_av1;
> > +
> > +    ctx->desired_packed_headers =
> > +        VA_ENC_PACKED_HEADER_SEQUENCE |
> > +        VA_ENC_PACKED_HEADER_PICTURE;
> > +
> > +    if (avctx->profile == FF_PROFILE_UNKNOWN)
> > +        avctx->profile = priv->profile;
> > +    if (avctx->level == FF_LEVEL_UNKNOWN)
> > +        avctx->level = priv->level;
> > +
> > +    if (avctx->level != FF_LEVEL_UNKNOWN && avctx->level & ~0x1f)
> > {
> > +        av_log(avctx, AV_LOG_ERROR, "Invalid level %d\n", avctx-
> > >level);
> > +        return AVERROR(EINVAL);
> > +    }
> > +
> > +    ret = ff_vaapi_encode_init(avctx);
> > +    if (ret < 0)
> > +        return ret;
> > +
> > +    attr.type = VAConfigAttribEncAV1;
> > +    vas = vaGetConfigAttributes(ctx->hwctx->display,
> > +                                ctx->va_profile,
> > +                                ctx->va_entrypoint,
> > +                                &attr, 1);
> > +    if (vas != VA_STATUS_SUCCESS) {
> > +        av_log(avctx, AV_LOG_ERROR, "Failed to query "
> > +               "config attribute: %d (%s).\n", vas,
> > vaErrorStr(vas));
> > +        return AVERROR_EXTERNAL;
> > +    } else if (attr.value == VA_ATTRIB_NOT_SUPPORTED) {
> > +        priv->attr.value = 0;
> > +        av_log(avctx, AV_LOG_WARNING, "Attribute type:%d is not "
> > +               "supported.\n", attr.type);
> > +    } else {
> > +        priv->attr.value = attr.value;
> > +    }
> > +
> > +    attr.type = VAConfigAttribEncAV1Ext1;
> > +    vas = vaGetConfigAttributes(ctx->hwctx->display,
> > +                                ctx->va_profile,
> > +                                ctx->va_entrypoint,
> > +                                &attr, 1);
> > +    if (vas != VA_STATUS_SUCCESS) {
> > +        av_log(avctx, AV_LOG_ERROR, "Failed to query "
> > +               "config attribute: %d (%s).\n", vas,
> > vaErrorStr(vas));
> > +        return AVERROR_EXTERNAL;
> > +    } else if (attr.value == VA_ATTRIB_NOT_SUPPORTED) {
> > +        priv->attr_ext1.value = 0;
> > +        av_log(avctx, AV_LOG_WARNING, "Attribute type:%d is not "
> > +               "supported.\n", attr.type);
> > +    } else {
> > +        priv->attr_ext1.value = attr.value;
> > +    }
> > +
> > +    /** This attr provides essential indicators, return error if
> > not support. */
> > +    attr.type = VAConfigAttribEncAV1Ext2;
> > +    vas = vaGetConfigAttributes(ctx->hwctx->display,
> > +                                ctx->va_profile,
> > +                                ctx->va_entrypoint,
> > +                                &attr, 1);
> > +    if (vas != VA_STATUS_SUCCESS || attr.value ==
> > VA_ATTRIB_NOT_SUPPORTED) {
> > +        av_log(avctx, AV_LOG_ERROR, "Failed to query "
> > +               "config attribute: %d (%s).\n", vas,
> > vaErrorStr(vas));
> > +        return AVERROR_EXTERNAL;
> > +    } else {
> > +        priv->attr_ext2.value = attr.value;
> > +    }
> > +
> > +    ret = vaapi_encode_av1_set_tile(avctx);
> > +    if (ret < 0)
> > +        return ret;
> > +
> > +    return 0;
> > +}
> > +
> > +static av_cold int vaapi_encode_av1_close(AVCodecContext *avctx)
> > +{
> > +    VAAPIEncodeAV1Context *priv = avctx->priv_data;
> > +
> > +    ff_cbs_fragment_free(&priv->current_obu);
> > +    ff_cbs_close(&priv->cbc);
> > +
> > +    return ff_vaapi_encode_close(avctx);
> > +}
> > +
> > +#define OFFSET(x) offsetof(VAAPIEncodeAV1Context, x)
> > +#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM |
> > AV_OPT_FLAG_ENCODING_PARAM)
> > +
> > +static const AVOption vaapi_encode_av1_options[] = {
> > +    VAAPI_ENCODE_COMMON_OPTIONS,
> > +    VAAPI_ENCODE_RC_OPTIONS,
> > +    { "profile", "Set profile (seq_profile)",
> > +      OFFSET(profile), AV_OPT_TYPE_INT,
> > +      { .i64 = FF_PROFILE_UNKNOWN }, FF_PROFILE_UNKNOWN, 0xff,
> > FLAGS, "profile" },
> > +
> > +#define PROFILE(name, value)  name, NULL, 0, AV_OPT_TYPE_CONST, \
> > +    { .i64 = value }, 0, 0, FLAGS, "profile"
> > +    { PROFILE("main",               FF_PROFILE_AV1_MAIN) },
> > +    { PROFILE("high",               FF_PROFILE_AV1_HIGH) },
> > +    { PROFILE("professional",       FF_PROFILE_AV1_PROFESSIONAL)
> > },
> > +#undef PROFILE
> > +
> > +    { "tier", "Set tier (seq_tier)",
> > +      OFFSET(tier), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS,
> > "tier" },
> > +    { "main", NULL, 0, AV_OPT_TYPE_CONST,
> > +      { .i64 = 0 }, 0, 0, FLAGS, "tier" },
> > +    { "high", NULL, 0, AV_OPT_TYPE_CONST,
> > +      { .i64 = 1 }, 0, 0, FLAGS, "tier" },
> > +    { "level", "Set level (seq_level_idx)",
> > +      OFFSET(level), AV_OPT_TYPE_INT,
> > +      { .i64 = FF_LEVEL_UNKNOWN }, FF_LEVEL_UNKNOWN, 0x1f, FLAGS,
> > "level" },
> > +
> > +#define LEVEL(name, value) name, NULL, 0, AV_OPT_TYPE_CONST, \
> > +      { .i64 = value }, 0, 0, FLAGS, "level"
> > +    { LEVEL("2.0",  0) },
> > +    { LEVEL("2.1",  1) },
> > +    { LEVEL("3.0",  4) },
> > +    { LEVEL("3.1",  5) },
> > +    { LEVEL("4.0",  8) },
> > +    { LEVEL("4.1",  9) },
> > +    { LEVEL("5.0", 12) },
> > +    { LEVEL("5.1", 13) },
> > +    { LEVEL("5.2", 14) },
> > +    { LEVEL("5.3", 15) },
> > +    { LEVEL("6.0", 16) },
> > +    { LEVEL("6.1", 17) },
> > +    { LEVEL("6.2", 18) },
> > +    { LEVEL("6.3", 19) },
> > +#undef LEVEL
> > +
> > +    { "tiles", "Tile columns x rows",
> > +      OFFSET(tile_cols), AV_OPT_TYPE_IMAGE_SIZE, { .str = "1x1" },
> > 1, AV1_MAX_TILE_COLS, FLAGS },
> > +    { "tile_groups", "Number of tile groups for encoding",
> > +      OFFSET(tile_groups), AV_OPT_TYPE_INT, { .i64 = 1 }, 1,
> > AV1_MAX_TILE_ROWS * AV1_MAX_TILE_COLS, FLAGS },
> > +
> > +    { NULL },
> > +};
> > +
> > +static const FFCodecDefault vaapi_encode_av1_defaults[] = {
> > +    { "b",              "0"   },
> > +    { "bf",             "7"   },
> > +    { "g",              "120" },
> > +    { "qmin",           "1"   },
> > +    { "qmax",           "255" },
> > +    { NULL },
> > +};
> > +
> > +static const AVClass vaapi_encode_av1_class = {
> > +    .class_name = "av1_vaapi",
> > +    .item_name  = av_default_item_name,
> > +    .option     = vaapi_encode_av1_options,
> > +    .version    = LIBAVUTIL_VERSION_INT,
> > +};
> > +
> > +const FFCodec ff_av1_vaapi_encoder = {
> > +    .p.name         = "av1_vaapi",
> > +    CODEC_LONG_NAME("AV1 (VAAPI)"),
> > +    .p.type         = AVMEDIA_TYPE_VIDEO,
> > +    .p.id           = AV_CODEC_ID_AV1,
> > +    .priv_data_size = sizeof(VAAPIEncodeAV1Context),
> > +    .init           = &vaapi_encode_av1_init,
> > +    FF_CODEC_RECEIVE_PACKET_CB(&ff_vaapi_encode_receive_packet),
> > +    .close          = &vaapi_encode_av1_close,
> > +    .p.priv_class   = &vaapi_encode_av1_class,
> > +    .p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE |
> > +                      AV_CODEC_CAP_DR1 |
> > AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE,
> > +    .caps_internal  = FF_CODEC_CAP_NOT_INIT_THREADSAFE |
> > +                      FF_CODEC_CAP_INIT_CLEANUP,
> > +    .defaults       = vaapi_encode_av1_defaults,
> > +    .p.pix_fmts = (const enum AVPixelFormat[]) {
> > +        AV_PIX_FMT_VAAPI,
> > +        AV_PIX_FMT_NONE,
> > +    },
> > +    .hw_configs     = ff_vaapi_encode_hw_configs,
> > +    .p.wrapper_name = "vaapi",
> > +};
> > --
> > 2.25.1
> > 
> > _______________________________________________
> > ffmpeg-devel mailing list
> > ffmpeg-devel@ffmpeg.org
> > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> > 
> > To unsubscribe, visit link above, or email
> > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
diff mbox series

Patch

diff --git a/Changelog b/Changelog
index 3876082844..7ae9b85d52 100644
--- a/Changelog
+++ b/Changelog
@@ -25,6 +25,7 @@  version <next>:
 - Raw VVC bitstream parser, muxer and demuxer
 - Bitstream filter for editing metadata in VVC streams
 - Bitstream filter for converting VVC from MP4 to Annex B
+- VAAPI AV1 encoder
 
 version 6.0:
 - Radiance HDR image support
diff --git a/configure b/configure
index 0ab0761011..6a1a30aaec 100755
--- a/configure
+++ b/configure
@@ -3323,6 +3323,8 @@  av1_qsv_decoder_select="qsvdec"
 av1_qsv_encoder_select="qsvenc"
 av1_qsv_encoder_deps="libvpl"
 av1_amf_encoder_deps="amf"
+av1_vaapi_encoder_deps="VAEncPictureParameterBufferAV1"
+av1_vaapi_encoder_select="cbs_av1 vaapi_encode"
 
 # parsers
 aac_parser_select="adts_header mpeg4audio"
@@ -7106,6 +7108,7 @@  if enabled vaapi; then
     check_type "va/va.h va/va_enc_jpeg.h" "VAEncPictureParameterBufferJPEG"
     check_type "va/va.h va/va_enc_vp8.h"  "VAEncPictureParameterBufferVP8"
     check_type "va/va.h va/va_enc_vp9.h"  "VAEncPictureParameterBufferVP9"
+    check_type "va/va.h va/va_enc_av1.h"  "VAEncPictureParameterBufferAV1"
 fi
 
 if enabled_all opencl libdrm ; then
diff --git a/doc/encoders.texi b/doc/encoders.texi
index 25d6b7f09e..fb331ebd8e 100644
--- a/doc/encoders.texi
+++ b/doc/encoders.texi
@@ -3991,6 +3991,19 @@  Average variable bitrate.
 Each encoder also has its own specific options:
 @table @option
 
+@item av1_vaapi
+@option{profile} sets the value of @emph{seq_profile}.
+@option{tier} sets the value of @emph{seq_tier}.
+@option{level} sets the value of @emph{seq_level_idx}.
+
+@table @option
+@item tiles
+Set the number of tiles to encode the input video with, as columns x rows.
+(default is 1x1).
+@item tile_groups
+Set tile groups number (default is 1).
+@end table
+
 @item h264_vaapi
 @option{profile} sets the value of @emph{profile_idc} and the @emph{constraint_set*_flag}s.
 @option{level} sets the value of @emph{level_idc}.
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 3cd5997e64..fe1e6aa99d 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -259,6 +259,7 @@  OBJS-$(CONFIG_AV1_MEDIACODEC_DECODER)  += mediacodecdec.o
 OBJS-$(CONFIG_AV1_MEDIACODEC_ENCODER)  += mediacodecenc.o
 OBJS-$(CONFIG_AV1_NVENC_ENCODER)       += nvenc_av1.o nvenc.o
 OBJS-$(CONFIG_AV1_QSV_ENCODER)         += qsvenc_av1.o
+OBJS-$(CONFIG_AV1_VAAPI_ENCODER)       += vaapi_encode_av1.o av1_profile_level.o
 OBJS-$(CONFIG_AVRN_DECODER)            += avrndec.o
 OBJS-$(CONFIG_AVRP_DECODER)            += r210dec.o
 OBJS-$(CONFIG_AVRP_ENCODER)            += r210enc.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 8775d15a4f..c43c1d7b48 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -844,6 +844,7 @@  extern const FFCodec ff_av1_nvenc_encoder;
 extern const FFCodec ff_av1_qsv_decoder;
 extern const FFCodec ff_av1_qsv_encoder;
 extern const FFCodec ff_av1_amf_encoder;
+extern const FFCodec ff_av1_vaapi_encoder;
 extern const FFCodec ff_libopenh264_encoder;
 extern const FFCodec ff_libopenh264_decoder;
 extern const FFCodec ff_h264_amf_encoder;
diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
index 2604f12b9e..2907e159fb 100644
--- a/libavcodec/vaapi_encode.c
+++ b/libavcodec/vaapi_encode.c
@@ -669,6 +669,15 @@  static int vaapi_encode_set_output_timestamp(AVCodecContext *avctx,
 {
     VAAPIEncodeContext *ctx = avctx->priv_data;
 
+    // AV1 packs P frame and next B frame into one pkt, and uses the other
+    // repeat frame header pkt at the display order position of the P frame
+    // to indicate its frame index. Each frame has a corresponding pkt in its
+    // display order position. So don't need to consider delay for AV1 timestamp.
+    if (avctx->codec_id == AV_CODEC_ID_AV1) {
+        pkt->dts = pkt->pts - ctx->dts_pts_diff;
+        return 0;
+    }
+
     if (ctx->output_delay == 0) {
         pkt->dts = pkt->pts;
     } else if (pic->encode_order < ctx->decode_delay) {
@@ -689,9 +698,10 @@  static int vaapi_encode_output(AVCodecContext *avctx,
 {
     VAAPIEncodeContext *ctx = avctx->priv_data;
     VACodedBufferSegment *buf_list, *buf;
-    VAStatus vas;
+    AVPacket *pkt_ptr = pkt;
     int total_size = 0;
     uint8_t *ptr;
+    VAStatus vas;
     int err;
 
     err = vaapi_encode_wait(avctx, pic);
@@ -711,11 +721,52 @@  static int vaapi_encode_output(AVCodecContext *avctx,
     for (buf = buf_list; buf; buf = buf->next)
         total_size += buf->size;
 
-    err = ff_get_encode_buffer(avctx, pkt, total_size, 0);
-    ptr = pkt->data;
+    /** repack av1 coded frame for not display and repeat frames */
+    if (avctx->codec_id == AV_CODEC_ID_AV1) {
+        int display_frame = pic->display_order <= pic->encode_order;
 
-    if (err < 0)
-        goto fail_mapped;
+        if (display_frame) {
+            total_size += ctx->header_data_size;
+            err = ff_get_encode_buffer(avctx, pkt, total_size, 0);
+            if (err < 0)
+                goto fail_mapped;
+            ptr = pkt->data;
+
+            if (ctx->header_data_size) {
+                memcpy(ptr, ctx->header_data, ctx->header_data_size);
+                ptr += ctx->header_data_size;
+                ctx->header_data_size = 0;
+            }
+        } else {
+            ctx->header_data = av_realloc(ctx->header_data, total_size);
+            if (!ctx->header_data) {
+                err = AVERROR(ENOMEM);
+                goto fail_mapped;
+            }
+            ptr = ctx->header_data;
+            ctx->header_data_size = total_size;
+
+            if (pic->tail_size) {
+                if (ctx->tail_pkt->size) {
+                    err = AVERROR(AVERROR_BUG);
+                    goto fail_mapped;
+                }
+
+                err = ff_get_encode_buffer(avctx, ctx->tail_pkt, pic->tail_size, 0);
+                if (err < 0)
+                    goto fail_mapped;
+
+                memcpy(ctx->tail_pkt->data, pic->tail_data, pic->tail_size);
+                pkt_ptr = ctx->tail_pkt;
+            }
+        }
+    } else {
+        err = ff_get_encode_buffer(avctx, pkt, total_size, 0);
+        ptr = pkt->data;
+
+        if (err < 0)
+            goto fail_mapped;
+    }
 
     for (buf = buf_list; buf; buf = buf->next) {
         av_log(avctx, AV_LOG_DEBUG, "Output buffer: %u bytes "
@@ -726,10 +777,10 @@  static int vaapi_encode_output(AVCodecContext *avctx,
     }
 
     if (pic->type == PICTURE_TYPE_IDR)
-        pkt->flags |= AV_PKT_FLAG_KEY;
+        pkt_ptr->flags |= AV_PKT_FLAG_KEY;
 
-    pkt->pts = pic->pts;
-    pkt->duration = pic->duration;
+    pkt_ptr->pts = pic->pts;
+    pkt_ptr->duration = pic->duration;
 
     vas = vaUnmapBuffer(ctx->hwctx->display, pic->output_buffer);
     if (vas != VA_STATUS_SUCCESS) {
@@ -742,8 +793,8 @@  static int vaapi_encode_output(AVCodecContext *avctx,
     // for no-delay encoders this is handled in generic codec
     if (avctx->codec->capabilities & AV_CODEC_CAP_DELAY &&
         avctx->flags & AV_CODEC_FLAG_COPY_OPAQUE) {
-        pkt->opaque     = pic->opaque;
-        pkt->opaque_ref = pic->opaque_ref;
+        pkt_ptr->opaque     = pic->opaque;
+        pkt_ptr->opaque_ref = pic->opaque_ref;
         pic->opaque_ref = NULL;
     }
 
@@ -752,6 +803,9 @@  static int vaapi_encode_output(AVCodecContext *avctx,
 
     av_log(avctx, AV_LOG_DEBUG, "Output read for pic %"PRId64"/%"PRId64".\n",
            pic->display_order, pic->encode_order);
+
+    vaapi_encode_set_output_timestamp(avctx, pic, pkt_ptr);
+
     return 0;
 
 fail_mapped:
@@ -1128,9 +1182,19 @@  static int vaapi_encode_pick_next(AVCodecContext *avctx,
 
     vaapi_encode_add_ref(avctx, pic, pic, 0, 1, 0);
     if (pic->type != PICTURE_TYPE_IDR) {
-        vaapi_encode_add_ref(avctx, pic, start,
-                             pic->type == PICTURE_TYPE_P,
-                             b_counter > 0, 0);
+        // TODO: apply both previous and forward multi reference for all vaapi encoders.
+        // And L0/L1 reference frame number can be set dynamically through query
+        // VAConfigAttribEncMaxRefFrames attribute.
+        if (avctx->codec_id == AV_CODEC_ID_AV1) {
+            for (i = 0; i < ctx->nb_next_prev; i++)
+                vaapi_encode_add_ref(avctx, pic, ctx->next_prev[i],
+                                     pic->type == PICTURE_TYPE_P,
+                                     b_counter > 0, 0);
+        } else
+            vaapi_encode_add_ref(avctx, pic, start,
+                                 pic->type == PICTURE_TYPE_P,
+                                 b_counter > 0, 0);
+
         vaapi_encode_add_ref(avctx, pic, ctx->next_prev[ctx->nb_next_prev - 1], 0, 0, 1);
     }
 
@@ -1292,6 +1356,19 @@  int ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
     AVFrame *frame = ctx->frame;
     int err;
 
+start:
+    /** if no B frame before repeat P frame, sent repeat P frame out. */
+    if (avctx->codec_id == AV_CODEC_ID_AV1 && ctx->tail_pkt->size) {
+        for (VAAPIEncodePicture *tmp = ctx->pic_start; tmp; tmp = tmp->next) {
+            if (tmp->type == PICTURE_TYPE_B && tmp->pts < ctx->tail_pkt->pts)
+                break;
+            else if (!tmp->next) {
+                av_packet_move_ref(pkt, ctx->tail_pkt);
+                goto end;
+            }
+        }
+    }
+
     err = ff_encode_get_frame(avctx, frame);
     if (err < 0 && err != AVERROR_EOF)
         return err;
@@ -1356,17 +1433,21 @@  int ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
         return err;
     }
 
-    vaapi_encode_set_output_timestamp(avctx, pic, pkt);
-    av_log(avctx, AV_LOG_DEBUG, "Output packet: pts %"PRId64", dts %"PRId64", "
-           "size %u bytes.\n", pkt->pts, pkt->dts, pkt->size);
-
     ctx->output_order = pic->encode_order;
     vaapi_encode_clear_old(avctx);
 
+    /** loop to get an available pkt in encoder flushing. */
+    if (ctx->end_of_stream && !pkt->size)
+        goto start;
+
+end:
+    if (pkt->size)
+        av_log(avctx, AV_LOG_DEBUG, "Output packet: pts %"PRId64", dts %"PRId64", "
+               "size %u bytes.\n", pkt->pts, pkt->dts, pkt->size);
+
     return 0;
 }
 
-
 static av_cold void vaapi_encode_add_global_param(AVCodecContext *avctx, int type,
                                                   void *buffer, size_t size)
 {
@@ -2667,6 +2748,12 @@  av_cold int ff_vaapi_encode_init(AVCodecContext *avctx)
     ctx->device = (AVHWDeviceContext*)ctx->device_ref->data;
     ctx->hwctx = ctx->device->hwctx;
 
+    ctx->tail_pkt = av_packet_alloc();
+    if (!ctx->tail_pkt) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
     err = vaapi_encode_profile_entrypoint(avctx);
     if (err < 0)
         goto fail;
@@ -2859,9 +2946,11 @@  av_cold int ff_vaapi_encode_close(AVCodecContext *avctx)
     }
 
     av_frame_free(&ctx->frame);
+    av_packet_free(&ctx->tail_pkt);
 
     av_freep(&ctx->codec_sequence_params);
     av_freep(&ctx->codec_picture_params);
+    av_freep(&ctx->header_data);
     av_fifo_freep2(&ctx->encode_fifo);
 
     av_buffer_unref(&ctx->recon_frames_ref);
diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
index d5452a37b3..03df8d6d46 100644
--- a/libavcodec/vaapi_encode.h
+++ b/libavcodec/vaapi_encode.h
@@ -133,6 +133,11 @@  typedef struct VAAPIEncodePicture {
 
     int          nb_slices;
     VAAPIEncodeSlice *slices;
+
+    /** Tail data of current pic, used only for repeat header of AV1. */
+    char tail_data[MAX_PARAM_BUFFER_SIZE];
+    /** Byte length of tail_data. */
+    size_t tail_size;
 } VAAPIEncodePicture;
 
 typedef struct VAAPIEncodeProfile {
@@ -367,6 +372,13 @@  typedef struct VAAPIEncodeContext {
     AVFifo          *encode_fifo;
     // Max number of frame buffered in encoder.
     int             async_depth;
+
+    /** Head data for current output pkt, used only for AV1. */
+    void  *header_data;
+    size_t header_data_size;
+
+    /** Store av1 repeat frame header pkt. */
+    AVPacket *tail_pkt;
 } VAAPIEncodeContext;
 
 enum {
diff --git a/libavcodec/vaapi_encode_av1.c b/libavcodec/vaapi_encode_av1.c
new file mode 100644
index 0000000000..1635b25bb7
--- /dev/null
+++ b/libavcodec/vaapi_encode_av1.c
@@ -0,0 +1,1228 @@ 
+/*
+ * Copyright (c) 2023 Intel Corporation
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <va/va.h>
+#include <va/va_enc_av1.h>
+
+#include "libavutil/pixdesc.h"
+#include "libavutil/opt.h"
+
+#include "cbs_av1.h"
+#include "put_bits.h"
+#include "codec_internal.h"
+#include "av1_profile_level.h"
+#include "vaapi_encode.h"
+
+#define AV1_MAX_QUANT 255
+
+typedef struct VAAPIEncodeAV1Picture {
+    int64_t last_idr_frame;
+    int slot;
+} VAAPIEncodeAV1Picture;
+
+typedef struct VAAPIEncodeAV1Context {
+    VAAPIEncodeContext common;
+    AV1RawOBU sh; /**< sequence header.*/
+    AV1RawOBU fh; /**< frame header.*/
+    CodedBitstreamContext *cbc;
+    CodedBitstreamFragment current_obu;
+    VAConfigAttribValEncAV1 attr;
+    VAConfigAttribValEncAV1Ext1 attr_ext1;
+    VAConfigAttribValEncAV1Ext2 attr_ext2;
+
+    char sh_data[MAX_PARAM_BUFFER_SIZE]; /**< coded sequence header data. */
+    size_t sh_data_len; /**< bit length of sh_data. */
+    char fh_data[MAX_PARAM_BUFFER_SIZE]; /**< coded frame header data. */
+    size_t fh_data_len; /**< bit length of fh_data. */
+
+    uint8_t uniform_tile;
+    uint8_t use_128x128_superblock;
+    int sb_cols;
+    int sb_rows;
+    int tile_cols_log2;
+    int tile_rows_log2;
+    int max_tile_width_sb;
+    int max_tile_height_sb;
+    uint8_t width_in_sbs_minus_1[AV1_MAX_TILE_COLS];
+    uint8_t height_in_sbs_minus_1[AV1_MAX_TILE_ROWS];
+
+    int min_log2_tile_cols;
+    int max_log2_tile_cols;
+    int min_log2_tile_rows;
+    int max_log2_tile_rows;
+
+    int q_idx_idr;
+    int q_idx_p;
+    int q_idx_b;
+
+    /** user options */
+    int profile;
+    int level;
+    int tier;
+    int tile_cols, tile_rows;
+    int tile_groups;
+} VAAPIEncodeAV1Context;
+
+static av_cold int vaapi_encode_av1_configure(AVCodecContext *avctx)
+{
+    VAAPIEncodeContext     *ctx = avctx->priv_data;
+    VAAPIEncodeAV1Context *priv = avctx->priv_data;
+    int ret;
+
+    ret = ff_cbs_init(&priv->cbc, AV_CODEC_ID_AV1, avctx);
+    if (ret < 0)
+        return ret;
+
+    if (ctx->rc_mode->quality) {
+        priv->q_idx_p = av_clip(ctx->rc_quality, 0, AV1_MAX_QUANT);
+        if (fabs(avctx->i_quant_factor) > 0.0)
+            priv->q_idx_idr =
+                av_clip((fabs(avctx->i_quant_factor) * priv->q_idx_p  +
+                         avctx->i_quant_offset) + 0.5,
+                        0, AV1_MAX_QUANT);
+        else
+            priv->q_idx_idr = priv->q_idx_p;
+
+        if (fabs(avctx->b_quant_factor) > 0.0)
+            priv->q_idx_b =
+                av_clip((fabs(avctx->b_quant_factor) * priv->q_idx_p  +
+                         avctx->b_quant_offset) + 0.5,
+                        0, AV1_MAX_QUANT);
+        else
+            priv->q_idx_b = priv->q_idx_p;
+    } else {
+        /** Arbitrary value */
+        priv->q_idx_idr = priv->q_idx_p = priv->q_idx_b = 128;
+    }
+
+    return 0;
+}
+
+static int vaapi_encode_av1_add_obu(AVCodecContext *avctx,
+                                    CodedBitstreamFragment *au,
+                                    uint8_t type,
+                                    void *obu_unit)
+{
+    int ret;
+
+    ret = ff_cbs_insert_unit_content(au, -1,
+                                     type, obu_unit, NULL);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to add OBU unit: "
+               "type = %d.\n", type);
+        return ret;
+    }
+
+    return 0;
+}
+
+static int vaapi_encode_av1_write_obu(AVCodecContext *avctx,
+                                      char *data, size_t *data_len,
+                                      CodedBitstreamFragment *bs)
+{
+    VAAPIEncodeAV1Context *priv = avctx->priv_data;
+    int ret;
+
+    ret = ff_cbs_write_fragment_data(priv->cbc, bs);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to write packed header.\n");
+        return ret;
+    }
+
+    if ((size_t)8 * MAX_PARAM_BUFFER_SIZE < 8 * bs->data_size - bs->data_bit_padding) {
+        av_log(avctx, AV_LOG_ERROR, "Access unit too large: "
+               "%zu < %zu.\n", (size_t)8 * MAX_PARAM_BUFFER_SIZE,
+               8 * bs->data_size - bs->data_bit_padding);
+        return AVERROR(ENOSPC);
+    }
+
+    memcpy(data, bs->data, bs->data_size);
+    *data_len = 8 * bs->data_size - bs->data_bit_padding;
+
+    return 0;
+}
+
+static int get_relative_dist(const AV1RawSequenceHeader *seq,
+                             unsigned int a, unsigned int b)
+{
+    unsigned int diff, m;
+    if (!seq->enable_order_hint)
+        return 0;
+    diff = a - b;
+    m = 1 << seq->order_hint_bits_minus_1;
+    diff = (diff & (m - 1)) - (diff & m);
+    return diff;
+}
+
+static int write_ns(PutBitContext *pbc, uint32_t n, uint32_t value)
+{
+    uint32_t w, m, v, extra_bit;
+
+    w = av_log2(n) + 1;
+    m = (1 << w) - n;
+
+    if (value < m) {
+        v = value;
+        put_bits(pbc, w - 1, v);
+    } else {
+        v = m + ((value - m) >> 1);
+        extra_bit = (value - m) & 1;
+        put_bits(pbc, w - 1, v);
+        put_bits(pbc, 1, extra_bit);
+    }
+
+    return 0;
+}
+
+/**
+ * This API provide the minmum implemention according current enabled features
+ * in frame header. If more features will enable in furture, please make sure
+ * the relative flags of features should be packed correctly into frame header
+ * obu in this API.
+ */
+static int vaapi_encode_av1_write_frame_header(AVCodecContext *avctx,
+                                               VAAPIEncodePicture *pic,
+                                               char *data, size_t *data_len)
+{
+    VAAPIEncodeContext              *ctx = avctx->priv_data;
+    VAAPIEncodeAV1Context          *priv = avctx->priv_data;
+    AV1RawOBU                    *fh_obu = &priv->fh;
+    AV1RawOBU                    *sh_obu = &priv->sh;
+    AV1RawFrameHeader                *fh = &fh_obu->obu.frame.header;
+    AV1RawSequenceHeader             *sh = &sh_obu->obu.sequence_header;
+    VAEncPictureParameterBufferAV1 *vpic = pic->codec_picture_params;
+    PutBitContext pbc, pbc_tmp;
+    uint8_t byte;
+    int qindex, coded_lossless;
+    int id_len, frame_is_intra, skip_mode_allowed;
+    int start, payload_bits, obu_size, obu_size_len;
+    int qindex_offset, loopfilter_offset;
+    int cdef_start_offset, cdef_end_offset;
+    int i;
+
+    init_put_bits(&pbc, data, MAX_PARAM_BUFFER_SIZE);
+
+    /** obu header */
+    put_bits(&pbc, 1, fh_obu->header.obu_forbidden_bit);
+    put_bits(&pbc, 4, fh_obu->header.obu_type);
+    put_bits(&pbc, 1, fh_obu->header.obu_extension_flag);
+    put_bits(&pbc, 1, fh_obu->header.obu_has_size_field);
+    put_bits(&pbc, 1, fh_obu->header.obu_reserved_1bit);
+
+    /** record pbc status to re-write obu size later. */
+    if (fh_obu->header.obu_has_size_field) {
+        pbc_tmp = pbc;
+        put_bits32(&pbc, 0);
+        put_bits32(&pbc, 0);
+    }
+
+    start = put_bits_count(&pbc);
+
+    /** uncompressed_header() */
+    if (sh->frame_id_numbers_present_flag)
+        id_len = sh->additional_frame_id_length_minus_1 +
+                 sh->delta_frame_id_length_minus_2 + 3;
+
+    frame_is_intra = (fh->frame_type == AV1_FRAME_KEY ||
+                      fh->frame_type == AV1_FRAME_INTRA_ONLY);
+    if (!sh->reduced_still_picture_header) {
+        put_bits(&pbc, 1, fh->show_existing_frame);
+        if (fh->show_existing_frame) {
+            put_bits(&pbc, 3, fh->frame_to_show_map_idx);
+            goto trailing_bits;
+        }
+        if (sh->frame_id_numbers_present_flag)
+            put_bits(&pbc, id_len, fh->display_frame_id);
+
+        put_bits(&pbc, 2, fh->frame_type);
+        put_bits(&pbc, 1, fh->show_frame);
+        if (!fh->show_frame)
+            put_bits(&pbc, 1, fh->showable_frame);
+
+        if (!(fh->frame_type == AV1_FRAME_SWITCH ||
+            (fh->frame_type == AV1_FRAME_KEY && fh->show_frame)))
+            put_bits(&pbc, 1, fh->error_resilient_mode);
+    }
+
+    put_bits(&pbc, 1, fh->disable_cdf_update);
+    if (sh->seq_force_screen_content_tools == AV1_SELECT_SCREEN_CONTENT_TOOLS)
+        put_bits(&pbc, 1, fh->allow_screen_content_tools);
+
+    if (fh->allow_screen_content_tools && sh->seq_force_integer_mv == AV1_SELECT_INTEGER_MV)
+        put_bits(&pbc, 1, fh->force_integer_mv);
+
+    if (sh->frame_id_numbers_present_flag)
+        put_bits(&pbc, id_len, fh->current_frame_id);
+
+    if (fh->frame_type != AV1_FRAME_SWITCH && !sh->reduced_still_picture_header)
+        put_bits(&pbc, 1, fh->frame_size_override_flag);
+
+    if (sh->enable_order_hint)
+        put_bits(&pbc, sh->order_hint_bits_minus_1 + 1, fh->order_hint);
+
+    if (!(frame_is_intra || fh->error_resilient_mode))
+        put_bits(&pbc, 3, fh->primary_ref_frame);
+
+    if (!(fh->frame_type == AV1_FRAME_SWITCH ||
+        fh->frame_type == AV1_FRAME_KEY && fh->show_frame))
+        put_bits(&pbc, 8, fh->refresh_frame_flags);
+
+    if (frame_is_intra) {
+        /** render_size() */
+         put_bits(&pbc, 1, fh->render_and_frame_size_different);
+    } else {
+        if (!frame_is_intra && sh->enable_order_hint)
+            put_bits(&pbc, 1, fh->frame_refs_short_signaling);
+
+        for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
+            if (!fh->frame_refs_short_signaling)
+                put_bits(&pbc, 3, fh->ref_frame_idx[i]);
+        }
+
+        if (!(fh->frame_size_override_flag && !fh->error_resilient_mode))
+            put_bits(&pbc, 1, fh->render_and_frame_size_different);
+
+        if (!fh->force_integer_mv)
+            put_bits(&pbc, 1, fh->allow_high_precision_mv);
+
+        /** read_interpolation_filter() */
+        put_bits(&pbc, 1, fh->is_filter_switchable);
+        if (!fh->is_filter_switchable)
+            put_bits(&pbc, 2, fh->interpolation_filter);
+
+        put_bits(&pbc, 1, fh->is_motion_mode_switchable);
+    }
+
+    if (!(sh->reduced_still_picture_header || fh->disable_cdf_update))
+        put_bits(&pbc, 1, fh->disable_frame_end_update_cdf);
+
+    /** tile_info() */
+    put_bits(&pbc, 1, fh->uniform_tile_spacing_flag);
+    if (fh->uniform_tile_spacing_flag) {
+        for (i = 0; i < priv->tile_cols_log2 - priv->min_log2_tile_cols; i++) {
+            put_bits(&pbc, 1, 1);
+        }
+        if (priv->tile_cols_log2 < priv->max_log2_tile_cols)
+            put_bits(&pbc, 1, 0);
+
+        for (i = 0; i < priv->tile_rows_log2 - priv->min_log2_tile_rows; i++) {
+            put_bits(&pbc, 1, 1);
+        }
+        if (priv->tile_rows_log2 < priv->max_log2_tile_rows)
+            put_bits(&pbc, 1, 0);
+    } else {
+        int start_sb = 0;
+        int max_width, max_height;
+        for (i = 0; start_sb < priv->sb_cols; i++) {
+            max_width = FFMIN(priv->sb_cols - start_sb, priv->max_tile_width_sb);
+            write_ns(&pbc, max_width, fh->width_in_sbs_minus_1[i]);
+            start_sb += fh->width_in_sbs_minus_1[i] + 1;
+        }
+
+        start_sb = 0;
+        for (i = 0; start_sb < priv->sb_rows; i++) {
+            max_height = FFMIN(priv->sb_rows - start_sb, priv->max_tile_height_sb);
+            write_ns(&pbc, max_height, fh->height_in_sbs_minus_1[i]);
+            start_sb += fh->height_in_sbs_minus_1[i] + 1;
+        }
+    }
+
+    if (priv->tile_cols_log2 || priv->tile_rows_log2) {
+        put_bits(&pbc, priv->tile_cols_log2 + priv->tile_rows_log2, fh->context_update_tile_id);
+        put_bits(&pbc, 2, fh->tile_size_bytes_minus1);
+    }
+
+    qindex_offset = put_bits_count(&pbc);
+    /** quantization_params() */
+    put_bits(&pbc, 8, fh->base_q_idx);
+    put_bits(&pbc, 1, fh->delta_q_y_dc);
+    put_bits(&pbc, 1, fh->delta_q_u_dc);
+    put_bits(&pbc, 1, fh->delta_q_u_ac);
+    put_bits(&pbc, 1, fh->using_qmatrix);
+
+    /** segmentation_params() */
+    put_bits(&pbc, 1, fh->segmentation_enabled);
+
+    /** delta_q_params() */
+    if (fh->base_q_idx)
+        put_bits(&pbc, 1, fh->delta_q_present);
+    if (fh->delta_q_present)
+        put_bits(&pbc, 2, fh->delta_q_res);
+
+    /** delta_lf_params() */
+    if (fh->delta_q_present) {
+        if (!fh->allow_intrabc)
+            put_bits(&pbc, 1, fh->delta_lf_present);
+        if (fh->delta_lf_present) {
+            put_bits(&pbc, 2, fh->delta_lf_res);
+            put_bits(&pbc, 1, fh->delta_lf_multi);
+        }
+    }
+
+    /** codelossless */
+    coded_lossless = 1;
+    for (i = 0; i < AV1_MAX_SEGMENTS; i++) {
+        if (fh->segmentation_enabled && fh->feature_enabled[i][AV1_SEG_LVL_ALT_Q])
+            qindex = fh->base_q_idx + fh->feature_value[i][AV1_SEG_LVL_ALT_Q];
+        else
+            qindex = fh->base_q_idx;
+        qindex = av_clip_uintp2(qindex, 8);
+
+        if (qindex || fh->delta_q_y_dc || fh->delta_q_u_ac || fh->delta_q_u_dc ||
+            fh->delta_q_v_ac || fh->delta_q_v_dc)
+            coded_lossless = 0;
+    }
+
+    loopfilter_offset = put_bits_count(&pbc);
+    /** loop_filter_params() */
+    if (!(coded_lossless || fh->allow_intrabc)) {
+        put_bits(&pbc, 6, fh->loop_filter_level[0]);
+        put_bits(&pbc, 6, fh->loop_filter_level[1]);
+        if (fh->loop_filter_level[0] || fh->loop_filter_level[1]) {
+            put_bits(&pbc, 6, fh->loop_filter_level[2]);
+            put_bits(&pbc, 6, fh->loop_filter_level[3]);
+        }
+        put_bits(&pbc, 3, fh->loop_filter_sharpness);
+        put_bits(&pbc, 1, fh->loop_filter_delta_enabled);
+    }
+
+    cdef_start_offset = put_bits_count(&pbc);
+    /** cdef_params() */
+    if (!(coded_lossless || fh->allow_intrabc || !sh->enable_cdef)) {
+        put_bits(&pbc, 2, fh->cdef_damping_minus_3);
+        put_bits(&pbc, 2, fh->cdef_bits);
+        for (i = 0; i < (1 << fh->cdef_bits); i++) {
+            put_bits(&pbc, 4, fh->cdef_y_pri_strength[i]);
+            put_bits(&pbc, 2, fh->cdef_y_sec_strength[i]);
+            put_bits(&pbc, 4, fh->cdef_uv_pri_strength[i]);
+            put_bits(&pbc, 2, fh->cdef_uv_sec_strength[i]);
+        }
+    }
+    cdef_end_offset = put_bits_count(&pbc);
+
+    /** read_tx_mode() */
+    if (fh->tx_mode == AV1_TX_MODE_SELECT)
+        put_bits(&pbc, 1, 1);
+    else
+        put_bits(&pbc, 1, 0);
+
+    /** frame_reference_mode() */
+    if (!frame_is_intra)
+        put_bits(&pbc, 1, fh->reference_select);
+
+    /** skip_mode_params() */
+    if (frame_is_intra || !fh->reference_select || !sh->enable_order_hint)
+        skip_mode_allowed = 0;
+    else {
+        int forward_idx, backward_idx;
+        int ref_hint, forward_hint, backward_hint;
+
+        forward_idx = -1;
+        backward_idx = -1;
+
+        for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
+            ref_hint = fh->ref_order_hint[fh->ref_frame_idx[i]];
+            if (get_relative_dist(sh, ref_hint, fh->order_hint) < 0) {
+                if (forward_idx < 0 || get_relative_dist(sh, ref_hint, forward_idx) > 0) {
+                    forward_idx = i;
+                    forward_hint = ref_hint;
+                }
+            } else if (get_relative_dist(sh, ref_hint, fh->order_hint) > 0) {
+                if (backward_idx < 0 || get_relative_dist(sh, ref_hint, backward_hint) < 0) {
+                    backward_idx = i;
+                    backward_hint = ref_hint;
+                }
+            }
+        }
+        if (forward_idx < 0)
+            skip_mode_allowed = 0;
+        else if (backward_idx >= 0)
+            skip_mode_allowed = 1;
+        else {
+            int second_forward_idx, second_forward_hint;
+            second_forward_idx = -1;
+            for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
+                ref_hint = fh->ref_order_hint[fh->ref_frame_idx[i]];
+                if (get_relative_dist(sh, ref_hint, forward_hint) < 0) {
+                    if (second_forward_idx < 0 || get_relative_dist(sh, ref_hint, second_forward_hint) > 0){
+                        second_forward_idx = i;
+                        second_forward_hint = ref_hint;
+                    }
+                }
+            }
+            if (second_forward_idx < 0)
+                skip_mode_allowed = 0;
+            else
+                skip_mode_allowed = 1;
+        }
+    }
+
+    if (skip_mode_allowed)
+        put_bits(&pbc, 1, fh->skip_mode_present);
+
+    put_bits(&pbc, 1, fh->reduced_tx_set);
+
+    /** global_motion_params() */
+    if (!frame_is_intra) {
+        for (i = AV1_REF_FRAME_LAST; i <= AV1_REF_FRAME_ALTREF; i++) {
+            put_bits(&pbc, 1, fh->is_global[i]);
+            if (fh->is_global[i]) {
+                put_bits(&pbc, 1, fh->is_rot_zoom[i]);
+                if (!fh->is_rot_zoom[i])
+                    put_bits(&pbc, 1, fh->is_translation[i]);
+            }
+        }
+    }
+
+trailing_bits:
+    payload_bits = put_bits_count(&pbc) - start;
+
+    /** trailing_bits() */
+    put_bits(&pbc, 1, 1);
+    obu_size = (put_bits_count(&pbc) - start + 7) / 8;
+    for (i = 0; i < obu_size * 8 - payload_bits - 1; i++)
+        put_bits(&pbc, 1, 0);
+
+    flush_put_bits(&pbc);
+    *data_len = put_bits_count(&pbc);
+
+    /** update obu size in bitstream */
+    if (fh_obu->header.obu_has_size_field) {
+        obu_size_len = priv->attr_ext2.bits.obu_size_bytes_minus1 + 1;
+        for (i = 0; i < obu_size_len; i++) {
+            byte = obu_size >> (7 * i) & 0x7f;
+            if (i < obu_size_len - 1)
+                byte |= 0x80;
+            put_bits(&pbc_tmp, 8, byte);
+        }
+        flush_put_bits(&pbc_tmp);
+        memmove(pbc_tmp.buf_ptr, pbc_tmp.buf_ptr + (8 - obu_size_len), obu_size);
+        *data_len -= (8 - obu_size_len) * 8;
+    }
+
+    if (fh->show_existing_frame)
+        return 0;
+
+    if (!(ctx->va_rc_mode & VA_RC_CQP)) {
+        vpic->min_base_qindex = av_clip(avctx->qmin, 1, AV1_MAX_QUANT);
+        vpic->max_base_qindex = av_clip(avctx->qmax, 1, AV1_MAX_QUANT);
+
+        vpic->bit_offset_qindex            = qindex_offset - (8 - obu_size_len) * 8;
+        vpic->bit_offset_loopfilter_params = loopfilter_offset - (8 - obu_size_len) * 8;
+        vpic->bit_offset_cdef_params       = cdef_start_offset - (8 - obu_size_len) * 8;
+        vpic->size_in_bits_cdef_params     = cdef_end_offset - cdef_start_offset;
+        vpic->size_in_bits_frame_hdr_obu   = *data_len;
+
+        vpic->byte_offset_frame_hdr_obu_size = (((pic->type == PICTURE_TYPE_IDR) ?
+                                               priv->sh_data_len / 8 : 0) +
+                                               (fh_obu->header.obu_extension_flag ?
+                                               2 : 1));
+    }
+
+    return 0;
+}
+
+static int tile_log2(int blkSize, int target) {
+    int k;
+    for (k = 0; (blkSize << k) < target; k++);
+    return k;
+}
+
+static int vaapi_encode_av1_set_tile(AVCodecContext *avctx)
+{
+    VAAPIEncodeAV1Context *priv = avctx->priv_data;
+    int mi_cols, mi_rows, sb_shift, sb_size;
+    int max_tile_area_sb, max_tile_area_sb_varied;
+    int tile_width_sb, tile_height_sb, widest_tile_sb;
+    int min_log2_tiles;
+    int tile_rows_tmp, i;
+
+    if (priv->tile_cols > AV1_MAX_TILE_COLS ||
+        priv->tile_rows > AV1_MAX_TILE_ROWS) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid tile number %dx%d, should less than %dx%d.\n",
+               priv->tile_cols, priv->tile_rows, AV1_MAX_TILE_COLS, AV1_MAX_TILE_ROWS);
+        return AVERROR(EINVAL);
+    }
+
+    mi_cols = 2 * ((avctx->width + 7) >> 3);
+    mi_rows = 2 * ((avctx->height + 7) >> 3);
+    priv->sb_cols = priv->use_128x128_superblock ?
+                    ((mi_cols + 31) >> 5) : ((mi_cols + 15) >> 4);
+    priv->sb_rows = priv->use_128x128_superblock ?
+                    ((mi_rows + 31) >> 5) : ((mi_rows + 15) >> 4);
+    sb_shift = priv->use_128x128_superblock ? 5 : 4;
+    sb_size  = sb_shift + 2;
+    priv->max_tile_width_sb = AV1_MAX_TILE_WIDTH >> sb_size;
+    max_tile_area_sb = AV1_MAX_TILE_AREA  >> (2 * sb_size);
+
+    priv->min_log2_tile_cols = tile_log2(priv->max_tile_width_sb, priv->sb_cols);
+    priv->max_log2_tile_cols = tile_log2(1, FFMIN(priv->sb_cols, AV1_MAX_TILE_COLS));
+    priv->max_log2_tile_rows = tile_log2(1, FFMIN(priv->sb_rows, AV1_MAX_TILE_ROWS));
+    min_log2_tiles = FFMAX(priv->min_log2_tile_cols,
+                           tile_log2(max_tile_area_sb, priv->sb_rows * priv->sb_cols));
+
+    if (priv->tile_cols != av_clip(priv->tile_cols, (priv->sb_cols + priv->max_tile_width_sb - 1) / priv->max_tile_width_sb, priv->sb_cols)) {
+        priv->tile_cols = av_clip(priv->tile_cols, (priv->sb_cols + priv->max_tile_width_sb - 1) / priv->max_tile_width_sb, priv->sb_cols);
+        av_log(avctx, AV_LOG_WARNING, "Invalid tile cols, correct to %d.\n", priv->tile_cols);
+    }
+
+    priv->tile_cols_log2 = tile_log2(1, priv->tile_cols);
+    tile_width_sb = (priv->sb_cols + (1 << priv->tile_cols_log2) - 1) >>
+                    priv->tile_cols_log2;
+
+    if (priv->tile_rows > priv->sb_rows) {
+        priv->tile_rows = priv->sb_rows;
+        av_log(avctx, AV_LOG_WARNING, "Invalid tile rows, correct to %d.\n", priv->tile_rows);
+    }
+
+    for (tile_rows_tmp = priv->tile_rows; tile_rows_tmp <= priv->sb_rows && tile_rows_tmp <= AV1_MAX_TILE_ROWS; tile_rows_tmp++) {
+        /** try uniformed tile. */
+        priv->tile_rows_log2 = tile_log2(1, tile_rows_tmp);
+        if ((priv->sb_cols + tile_width_sb - 1) / tile_width_sb == priv->tile_cols) {
+            for (i = 0; i < priv->tile_cols - 1; i++)
+                priv->width_in_sbs_minus_1[i] = tile_width_sb - 1;
+            priv->width_in_sbs_minus_1[i] = priv->sb_cols - (priv->tile_cols - 1) * tile_width_sb - 1;
+
+            tile_height_sb = (priv->sb_rows + (1 << priv->tile_rows_log2) - 1) >>
+                             priv->tile_rows_log2;
+
+            if ((priv->sb_rows + tile_height_sb - 1) / tile_height_sb == tile_rows_tmp &&
+                tile_height_sb <= max_tile_area_sb / tile_width_sb) {
+                for (i = 0; i < tile_rows_tmp - 1; i++)
+                    priv->height_in_sbs_minus_1[i] = tile_height_sb - 1;
+                priv->height_in_sbs_minus_1[i] = priv->sb_rows - (tile_rows_tmp - 1) * tile_height_sb - 1;
+
+                priv->uniform_tile = 1;
+                priv->min_log2_tile_rows = FFMAX(min_log2_tiles - priv->tile_cols_log2, 0);
+
+                break;
+            }
+        }
+
+        /** try non-uniformed tile. */
+        widest_tile_sb = 0;
+        for (i = 0; i < priv->tile_cols; i++) {
+            priv->width_in_sbs_minus_1[i] = (i + 1) * priv->sb_cols / priv->tile_cols - i * priv->sb_cols / priv->tile_cols - 1;
+            widest_tile_sb = FFMAX(widest_tile_sb, priv->width_in_sbs_minus_1[i] + 1);
+        }
+
+        if (min_log2_tiles)
+            max_tile_area_sb_varied = (priv->sb_rows * priv->sb_cols) >> (min_log2_tiles + 1);
+        else
+            max_tile_area_sb_varied = priv->sb_rows * priv->sb_cols;
+        priv->max_tile_height_sb = FFMAX(1, max_tile_area_sb_varied / widest_tile_sb);
+
+        if (tile_rows_tmp == av_clip(tile_rows_tmp, (priv->sb_rows + priv->max_tile_height_sb - 1) / priv->max_tile_height_sb, priv->sb_rows)) {
+            for (i = 0; i < tile_rows_tmp; i++)
+                priv->height_in_sbs_minus_1[i] = (i + 1) * priv->sb_rows / tile_rows_tmp - i * priv->sb_rows / tile_rows_tmp - 1;
+
+            break;
+        }
+    }
+
+    if (priv->tile_rows != tile_rows_tmp) {
+        priv->tile_rows = tile_rows_tmp;
+        av_log(avctx, AV_LOG_WARNING, "Invalid tile rows, correct to %d.\n", priv->tile_rows);
+    }
+
+    /** check if tile cols/rows is supported by driver. */
+    if (priv->attr_ext2.bits.max_tile_num_minus1) {
+        if ((priv->tile_cols * priv->tile_rows - 1) > priv->attr_ext2.bits.max_tile_num_minus1) {
+            av_log(avctx, AV_LOG_ERROR, "Unsupported tile num %d * %d = %d by driver, "
+                   "should be less than %d.\n", priv->tile_cols, priv->tile_rows,
+                   priv->tile_cols * priv->tile_rows,
+                   priv->attr_ext2.bits.max_tile_num_minus1 + 1);
+            return AVERROR(EINVAL);
+        }
+    }
+    av_log(avctx, AV_LOG_DEBUG, "Setting tile cols/rows to %d/%d.\n",
+           priv->tile_cols, priv->tile_rows);
+
+    /** check if tile group numbers is valid. */
+    if (priv->tile_groups > priv->tile_cols * priv->tile_rows) {
+        av_log(avctx, AV_LOG_WARNING, "Invalid tile groups number %d, "
+        "correct to %d.\n", priv->tile_groups, priv->tile_cols * priv->tile_rows);
+        priv->tile_groups = priv->tile_cols * priv->tile_rows;
+    }
+
+    return 0;
+}
+
+static int vaapi_encode_av1_write_sequence_header(AVCodecContext *avctx,
+                                                  char *data, size_t *data_len)
+{
+    VAAPIEncodeAV1Context *priv = avctx->priv_data;
+
+    memcpy(data, &priv->sh_data, MAX_PARAM_BUFFER_SIZE * sizeof(char));
+    *data_len = priv->sh_data_len;
+
+    return 0;
+}
+
+static int vaapi_encode_av1_init_sequence_params(AVCodecContext *avctx)
+{
+    VAAPIEncodeContext               *ctx = avctx->priv_data;
+    VAAPIEncodeAV1Context           *priv = avctx->priv_data;
+    AV1RawOBU                     *sh_obu = &priv->sh;
+    AV1RawSequenceHeader              *sh = &sh_obu->obu.sequence_header;
+    VAEncSequenceParameterBufferAV1 *vseq = ctx->codec_sequence_params;
+    CodedBitstreamFragment           *obu = &priv->current_obu;
+    const AVPixFmtDescriptor *desc;
+    int ret;
+
+    memset(sh_obu, 0, sizeof(*sh_obu));
+    sh_obu->header.obu_type = AV1_OBU_SEQUENCE_HEADER;
+
+    desc = av_pix_fmt_desc_get(priv->common.input_frames->sw_format);
+    av_assert0(desc);
+
+    sh->seq_profile  = avctx->profile;
+    if (!sh->seq_force_screen_content_tools)
+        sh->seq_force_integer_mv = AV1_SELECT_INTEGER_MV;
+    sh->frame_width_bits_minus_1  = av_log2(avctx->width);
+    sh->frame_height_bits_minus_1 = av_log2(avctx->height);
+    sh->max_frame_width_minus_1   = avctx->width - 1;
+    sh->max_frame_height_minus_1  = avctx->height - 1;
+    sh->seq_tier[0]               = priv->tier;
+    /** enable order hint and reserve maximum 8 bits for it by default. */
+    sh->enable_order_hint         = 1;
+    sh->order_hint_bits_minus_1   = 7;
+
+    sh->color_config = (AV1RawColorConfig) {
+        .high_bitdepth                  = desc->comp[0].depth == 8 ? 0 : 1,
+        .color_primaries                = avctx->color_primaries,
+        .transfer_characteristics       = avctx->color_trc,
+        .matrix_coefficients            = avctx->colorspace,
+        .color_description_present_flag = (avctx->color_primaries != AVCOL_PRI_UNSPECIFIED ||
+                                           avctx->color_trc       != AVCOL_TRC_UNSPECIFIED ||
+                                           avctx->colorspace      != AVCOL_SPC_UNSPECIFIED),
+        .subsampling_x                  = desc->log2_chroma_w,
+        .subsampling_y                  = desc->log2_chroma_h,
+    };
+
+    if (avctx->level != FF_LEVEL_UNKNOWN) {
+        sh->seq_level_idx[0] = avctx->level;
+    } else {
+        const AV1LevelDescriptor *level;
+        float framerate;
+
+        if (avctx->framerate.num > 0 && avctx->framerate.den > 0)
+            framerate = avctx->framerate.num / avctx->framerate.den;
+        else
+            framerate = 0;
+
+        level = ff_av1_guess_level(avctx->bit_rate, priv->tier,
+                                   ctx->surface_width, ctx->surface_height,
+                                   priv->tile_rows * priv->tile_cols,
+                                   priv->tile_cols, framerate);
+        if (level) {
+            av_log(avctx, AV_LOG_VERBOSE, "Using level %s.\n", level->name);
+            sh->seq_level_idx[0] = level->level_idx;
+        } else {
+            av_log(avctx, AV_LOG_VERBOSE, "Stream will not conform to "
+                   "any normal level, using level 6.3 by default.\n");
+            sh->seq_level_idx[0] = 19;
+            sh->seq_tier[0] = 1;
+        }
+    }
+    vseq->seq_profile             = sh->seq_profile;
+    vseq->seq_level_idx           = sh->seq_level_idx[0];
+    vseq->seq_tier                = sh->seq_tier[0];
+    vseq->order_hint_bits_minus_1 = sh->order_hint_bits_minus_1;
+    vseq->intra_period            = ctx->gop_size;
+    vseq->ip_period               = ctx->b_per_p + 1;
+
+    vseq->seq_fields.bits.enable_order_hint = sh->enable_order_hint;
+
+    if (!(ctx->va_rc_mode & VA_RC_CQP)) {
+        vseq->bits_per_second = ctx->va_bit_rate;
+        vseq->seq_fields.bits.enable_cdef = sh->enable_cdef = 1;
+    }
+
+    ret = vaapi_encode_av1_add_obu(avctx, obu, AV1_OBU_SEQUENCE_HEADER, &priv->sh);
+    if (ret < 0)
+        goto end;
+
+    ret = vaapi_encode_av1_write_obu(avctx, priv->sh_data, &priv->sh_data_len, obu);
+    if (ret < 0)
+        goto end;
+
+end:
+    ff_cbs_fragment_reset(obu);
+    return ret;
+}
+
+static int vaapi_encode_av1_init_picture_params(AVCodecContext *avctx,
+                                                VAAPIEncodePicture *pic)
+{
+    VAAPIEncodeAV1Context          *priv = avctx->priv_data;
+    VAAPIEncodeAV1Picture          *hpic = pic->priv_data;
+    AV1RawOBU                    *fh_obu = &priv->fh;
+    AV1RawFrameHeader                *fh = &fh_obu->obu.frame.header;
+    VAEncPictureParameterBufferAV1 *vpic = pic->codec_picture_params;
+    CodedBitstreamFragment          *obu = &priv->current_obu;
+    VAAPIEncodePicture    *ref;
+    VAAPIEncodeAV1Picture *href;
+    int slot, i;
+    int ret;
+    static const int8_t default_loop_filter_ref_deltas[AV1_TOTAL_REFS_PER_FRAME] =
+        { 1, 0, 0, 0, -1, 0, -1, -1 };
+
+    memset(fh_obu, 0, sizeof(*fh_obu));
+    pic->nb_slices = priv->tile_groups;
+    fh_obu->header.obu_type = AV1_OBU_FRAME_HEADER;
+    fh_obu->header.obu_has_size_field = 1;
+
+    switch (pic->type) {
+    case PICTURE_TYPE_IDR:
+        av_assert0(pic->nb_refs[0] == 0 || pic->nb_refs[1]);
+        fh->frame_type = AV1_FRAME_KEY;
+        fh->refresh_frame_flags = 0xFF;
+        fh->base_q_idx = priv->q_idx_idr;
+        hpic->slot = 0;
+        hpic->last_idr_frame = pic->display_order;
+        break;
+    case PICTURE_TYPE_P:
+        av_assert0(pic->nb_refs[0]);
+        fh->frame_type = AV1_FRAME_INTER;
+        fh->base_q_idx = priv->q_idx_p;
+        ref = pic->refs[0][pic->nb_refs[0] - 1];
+        href = ref->priv_data;
+        hpic->slot = !href->slot;
+        hpic->last_idr_frame = href->last_idr_frame;
+        fh->refresh_frame_flags = 1 << hpic->slot;
+
+        /** set the nearest frame in L0 as all reference frame. */
+        for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
+            fh->ref_frame_idx[i] = href->slot;
+        }
+        fh->primary_ref_frame = href->slot;
+        fh->ref_order_hint[href->slot] = ref->display_order - href->last_idr_frame;
+        vpic->ref_frame_ctrl_l0.fields.search_idx0 = AV1_REF_FRAME_LAST;
+
+        /** set the 2nd nearest frame in L0 as Golden frame. */
+        if (pic->nb_refs[0] > 1) {
+            ref = pic->refs[0][pic->nb_refs[0] - 2];
+            href = ref->priv_data;
+            fh->ref_frame_idx[3] = href->slot;
+            fh->ref_order_hint[href->slot] = ref->display_order - href->last_idr_frame;
+            vpic->ref_frame_ctrl_l0.fields.search_idx1 = AV1_REF_FRAME_GOLDEN;
+        }
+        break;
+    case PICTURE_TYPE_B:
+        av_assert0(pic->nb_refs[0] && pic->nb_refs[1]);
+        fh->frame_type = AV1_FRAME_INTER;
+        fh->base_q_idx = priv->q_idx_b;
+        fh->refresh_frame_flags = 0x0;
+        fh->reference_select = 1;
+
+        /** B frame will not be referenced, disable its recon frame. */
+        vpic->picture_flags.bits.disable_frame_recon = 1;
+
+        /** Use LAST_FRAME and BWDREF_FRAME for reference. */
+        vpic->ref_frame_ctrl_l0.fields.search_idx0 = AV1_REF_FRAME_LAST;
+        vpic->ref_frame_ctrl_l1.fields.search_idx0 = AV1_REF_FRAME_BWDREF;
+
+        ref                            = pic->refs[0][pic->nb_refs[0] - 1];
+        href                           = ref->priv_data;
+        hpic->last_idr_frame           = href->last_idr_frame;
+        fh->primary_ref_frame          = href->slot;
+        fh->ref_order_hint[href->slot] = ref->display_order - href->last_idr_frame;
+        for (i = 0; i < AV1_REF_FRAME_GOLDEN; i++) {
+            fh->ref_frame_idx[i] = href->slot;
+        }
+
+        ref                            = pic->refs[1][pic->nb_refs[1] - 1];
+        href                           = ref->priv_data;
+        fh->ref_order_hint[href->slot] = ref->display_order - href->last_idr_frame;
+        for (i = AV1_REF_FRAME_GOLDEN; i < AV1_REFS_PER_FRAME; i++) {
+            fh->ref_frame_idx[i] = href->slot;
+        }
+        break;
+    default:
+        av_assert0(0 && "invalid picture type");
+    }
+
+    fh->show_frame                = pic->display_order <= pic->encode_order;
+    fh->showable_frame            = fh->frame_type != AV1_FRAME_KEY;
+    fh->frame_width_minus_1       = avctx->width - 1;
+    fh->frame_height_minus_1      = avctx->height - 1;
+    fh->render_width_minus_1      = fh->frame_width_minus_1;
+    fh->render_height_minus_1     = fh->frame_height_minus_1;
+    fh->order_hint                = pic->display_order - hpic->last_idr_frame;
+    fh->tile_cols                 = priv->tile_cols;
+    fh->tile_rows                 = priv->tile_rows;
+    fh->tile_cols_log2            = priv->tile_cols_log2;
+    fh->tile_rows_log2            = priv->tile_rows_log2;
+    fh->uniform_tile_spacing_flag = priv->uniform_tile;
+    fh->tile_size_bytes_minus1    = priv->attr_ext2.bits.tile_size_bytes_minus1;
+    fh->reduced_tx_set            = 1;
+
+    /** ignore ONLY_4x4 mode for codedlossless is not fully implemented. */
+    if (priv->attr_ext2.bits.tx_mode_support & 0x04)
+        fh->tx_mode = AV1_TX_MODE_SELECT;
+    else if (priv->attr_ext2.bits.tx_mode_support & 0x02)
+        fh->tx_mode = AV1_TX_MODE_LARGEST;
+    else
+        return AVERROR(EINVAL);
+
+    for (i = 0; i < fh->tile_cols; i++)
+        fh->width_in_sbs_minus_1[i] = vpic->width_in_sbs_minus_1[i] = priv->width_in_sbs_minus_1[i];
+
+    for (i = 0; i < fh->tile_rows; i++)
+        fh->height_in_sbs_minus_1[i] = vpic->height_in_sbs_minus_1[i] = priv->height_in_sbs_minus_1[i];
+
+    memcpy(fh->loop_filter_ref_deltas, default_loop_filter_ref_deltas,
+           AV1_TOTAL_REFS_PER_FRAME * sizeof(int8_t));
+
+    if (fh->frame_type == AV1_FRAME_KEY && fh->show_frame) {
+        fh->error_resilient_mode = 1;
+    }
+
+    if (fh->frame_type == AV1_FRAME_KEY || fh->error_resilient_mode)
+        fh->primary_ref_frame = AV1_PRIMARY_REF_NONE;
+
+    vpic->base_qindex          = fh->base_q_idx;
+    vpic->frame_width_minus_1  = fh->frame_width_minus_1;
+    vpic->frame_height_minus_1 = fh->frame_height_minus_1;
+    vpic->primary_ref_frame    = fh->primary_ref_frame;
+    vpic->reconstructed_frame  = pic->recon_surface;
+    vpic->coded_buf            = pic->output_buffer;
+    vpic->tile_cols            = fh->tile_cols;
+    vpic->tile_rows            = fh->tile_rows;
+    vpic->order_hint           = fh->order_hint;
+#if VA_CHECK_VERSION(1, 15, 0)
+    vpic->refresh_frame_flags  = fh->refresh_frame_flags;
+#endif
+
+    vpic->picture_flags.bits.enable_frame_obu     = 0;
+    vpic->picture_flags.bits.frame_type           = fh->frame_type;
+    vpic->picture_flags.bits.reduced_tx_set       = fh->reduced_tx_set;
+    vpic->picture_flags.bits.error_resilient_mode = fh->error_resilient_mode;
+
+    /** let driver decide to use single or compound reference prediction mode. */
+    vpic->mode_control_flags.bits.reference_mode = fh->reference_select ? 2 : 0;
+    vpic->mode_control_flags.bits.tx_mode = fh->tx_mode;
+
+    vpic->tile_group_obu_hdr_info.bits.obu_has_size_field = 1;
+
+    /** set reference. */
+    for (i = 0; i < AV1_REFS_PER_FRAME; i++)
+        vpic->ref_frame_idx[i] = fh->ref_frame_idx[i];
+
+    for (i = 0; i < FF_ARRAY_ELEMS(vpic->reference_frames); i++)
+        vpic->reference_frames[i] = VA_INVALID_SURFACE;
+
+    for (i = 0; i < MAX_REFERENCE_LIST_NUM; i++) {
+        for (int j = 0; j < pic->nb_refs[i]; j++) {
+            VAAPIEncodePicture *ref_pic = pic->refs[i][j];
+
+            slot = ((VAAPIEncodeAV1Picture*)ref_pic->priv_data)->slot;
+            av_assert0(vpic->reference_frames[slot] == VA_INVALID_SURFACE);
+
+            vpic->reference_frames[slot] = ref_pic->recon_surface;
+        }
+    }
+
+    /** pack frame header, and set va params offset like bit_offset_qindex etc. */
+    ret = vaapi_encode_av1_write_frame_header(avctx, pic, priv->fh_data, &priv->fh_data_len);
+    if (ret < 0)
+        goto end;
+
+end:
+    ff_cbs_fragment_reset(obu);
+    return ret;
+}
+
+static int vaapi_encode_av1_init_slice_params(AVCodecContext *avctx,
+                                              VAAPIEncodePicture *pic,
+                                              VAAPIEncodeSlice *slice)
+{
+    VAAPIEncodeAV1Context      *priv = avctx->priv_data;
+    VAEncTileGroupBufferAV1  *vslice = slice->codec_slice_params;
+    CodedBitstreamAV1Context  *cbctx = priv->cbc->priv_data;
+    int div;
+
+    /** Set tile group info. */
+    div = priv->tile_cols * priv->tile_rows / priv->tile_groups;
+    vslice->tg_start = slice->index * div;
+    if (slice->index == (priv->tile_groups - 1)) {
+        vslice->tg_end = priv->tile_cols * priv->tile_rows - 1;
+        cbctx->seen_frame_header = 0;
+    } else {
+        vslice->tg_end = (slice->index + 1) * div - 1;
+    }
+
+    return 0;
+}
+
+static int vaapi_encode_av1_write_picture_header(AVCodecContext *avctx,
+                                                 VAAPIEncodePicture *pic,
+                                                 char *data, size_t *data_len)
+{
+    VAAPIEncodeAV1Context     *priv = avctx->priv_data;
+    CodedBitstreamFragment     *obu = &priv->current_obu;
+    AV1RawOBU               *fh_obu = &priv->fh;
+    AV1RawFrameHeader       *rep_fh = &fh_obu->obu.frame_header;
+    VAAPIEncodeAV1Picture *href;
+    int ret = 0;
+
+    pic->tail_size = 0;
+    /** Pack repeat frame header. */
+    if (pic->display_order > pic->encode_order) {
+        memset(fh_obu, 0, sizeof(*fh_obu));
+        href = pic->refs[0][pic->nb_refs[0] - 1]->priv_data;
+        fh_obu->header.obu_type = AV1_OBU_FRAME_HEADER;
+        fh_obu->header.obu_has_size_field = 1;
+
+        rep_fh->show_existing_frame   = 1;
+        rep_fh->frame_to_show_map_idx = href->slot == 0;
+        rep_fh->frame_type            = AV1_FRAME_INTER;
+        rep_fh->frame_width_minus_1   = avctx->width - 1;
+        rep_fh->frame_height_minus_1  = avctx->height - 1;
+        rep_fh->render_width_minus_1  = rep_fh->frame_width_minus_1;
+        rep_fh->render_height_minus_1 = rep_fh->frame_height_minus_1;
+
+        ret = vaapi_encode_av1_write_frame_header(avctx, pic, pic->tail_data, &pic->tail_size);
+        if (ret < 0)
+            goto end;
+
+        pic->tail_size /= 8;
+    }
+
+    memcpy(data, &priv->fh_data, MAX_PARAM_BUFFER_SIZE * sizeof(char));
+    *data_len = priv->fh_data_len;
+
+end:
+    ff_cbs_fragment_reset(obu);
+    return ret;
+}
+
+static const VAAPIEncodeProfile vaapi_encode_av1_profiles[] = {
+    { FF_PROFILE_AV1_MAIN,  8, 3, 1, 1, VAProfileAV1Profile0 },
+    { FF_PROFILE_AV1_MAIN, 10, 3, 1, 1, VAProfileAV1Profile0 },
+    { FF_PROFILE_UNKNOWN }
+};
+
+static const VAAPIEncodeType vaapi_encode_type_av1 = {
+    .profiles        = vaapi_encode_av1_profiles,
+    .flags           = FLAG_B_PICTURES,
+    .default_quality = 25,
+    .configure       = &vaapi_encode_av1_configure,
+
+    .sequence_header_type  = VAEncPackedHeaderSequence,
+    .sequence_params_size  = sizeof(VAEncSequenceParameterBufferAV1),
+    .init_sequence_params  = &vaapi_encode_av1_init_sequence_params,
+    .write_sequence_header = &vaapi_encode_av1_write_sequence_header,
+
+    .picture_priv_data_size = sizeof(VAAPIEncodeAV1Picture),
+    .picture_header_type    = VAEncPackedHeaderPicture,
+    .picture_params_size    = sizeof(VAEncPictureParameterBufferAV1),
+    .init_picture_params    = &vaapi_encode_av1_init_picture_params,
+    .write_picture_header   = &vaapi_encode_av1_write_picture_header,
+
+    .slice_params_size = sizeof(VAEncTileGroupBufferAV1),
+    .init_slice_params = &vaapi_encode_av1_init_slice_params,
+};
+
+static av_cold int vaapi_encode_av1_init(AVCodecContext *avctx)
+{
+    VAAPIEncodeContext      *ctx = avctx->priv_data;
+    VAAPIEncodeAV1Context  *priv = avctx->priv_data;
+    VAConfigAttrib attr;
+    VAStatus vas;
+    int ret;
+
+    ctx->codec = &vaapi_encode_type_av1;
+
+    ctx->desired_packed_headers =
+        VA_ENC_PACKED_HEADER_SEQUENCE |
+        VA_ENC_PACKED_HEADER_PICTURE;
+
+    if (avctx->profile == FF_PROFILE_UNKNOWN)
+        avctx->profile = priv->profile;
+    if (avctx->level == FF_LEVEL_UNKNOWN)
+        avctx->level = priv->level;
+
+    if (avctx->level != FF_LEVEL_UNKNOWN && avctx->level & ~0x1f) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid level %d\n", avctx->level);
+        return AVERROR(EINVAL);
+    }
+
+    ret = ff_vaapi_encode_init(avctx);
+    if (ret < 0)
+        return ret;
+
+    attr.type = VAConfigAttribEncAV1;
+    vas = vaGetConfigAttributes(ctx->hwctx->display,
+                                ctx->va_profile,
+                                ctx->va_entrypoint,
+                                &attr, 1);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to query "
+               "config attribute: %d (%s).\n", vas, vaErrorStr(vas));
+        return AVERROR_EXTERNAL;
+    } else if (attr.value == VA_ATTRIB_NOT_SUPPORTED) {
+        priv->attr.value = 0;
+        av_log(avctx, AV_LOG_WARNING, "Attribute type:%d is not "
+               "supported.\n", attr.type);
+    } else {
+        priv->attr.value = attr.value;
+    }
+
+    attr.type = VAConfigAttribEncAV1Ext1;
+    vas = vaGetConfigAttributes(ctx->hwctx->display,
+                                ctx->va_profile,
+                                ctx->va_entrypoint,
+                                &attr, 1);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to query "
+               "config attribute: %d (%s).\n", vas, vaErrorStr(vas));
+        return AVERROR_EXTERNAL;
+    } else if (attr.value == VA_ATTRIB_NOT_SUPPORTED) {
+        priv->attr_ext1.value = 0;
+        av_log(avctx, AV_LOG_WARNING, "Attribute type:%d is not "
+               "supported.\n", attr.type);
+    } else {
+        priv->attr_ext1.value = attr.value;
+    }
+
+    /** This attr provides essential indicators, return error if not support. */
+    attr.type = VAConfigAttribEncAV1Ext2;
+    vas = vaGetConfigAttributes(ctx->hwctx->display,
+                                ctx->va_profile,
+                                ctx->va_entrypoint,
+                                &attr, 1);
+    if (vas != VA_STATUS_SUCCESS || attr.value == VA_ATTRIB_NOT_SUPPORTED) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to query "
+               "config attribute: %d (%s).\n", vas, vaErrorStr(vas));
+        return AVERROR_EXTERNAL;
+    } else {
+        priv->attr_ext2.value = attr.value;
+    }
+
+    ret = vaapi_encode_av1_set_tile(avctx);
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
+
+static av_cold int vaapi_encode_av1_close(AVCodecContext *avctx)
+{
+    VAAPIEncodeAV1Context *priv = avctx->priv_data;
+
+    ff_cbs_fragment_free(&priv->current_obu);
+    ff_cbs_close(&priv->cbc);
+
+    return ff_vaapi_encode_close(avctx);
+}
+
+#define OFFSET(x) offsetof(VAAPIEncodeAV1Context, x)
+#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM)
+
+static const AVOption vaapi_encode_av1_options[] = {
+    VAAPI_ENCODE_COMMON_OPTIONS,
+    VAAPI_ENCODE_RC_OPTIONS,
+    { "profile", "Set profile (seq_profile)",
+      OFFSET(profile), AV_OPT_TYPE_INT,
+      { .i64 = FF_PROFILE_UNKNOWN }, FF_PROFILE_UNKNOWN, 0xff, FLAGS, "profile" },
+
+#define PROFILE(name, value)  name, NULL, 0, AV_OPT_TYPE_CONST, \
+    { .i64 = value }, 0, 0, FLAGS, "profile"
+    { PROFILE("main",               FF_PROFILE_AV1_MAIN) },
+    { PROFILE("high",               FF_PROFILE_AV1_HIGH) },
+    { PROFILE("professional",       FF_PROFILE_AV1_PROFESSIONAL) },
+#undef PROFILE
+
+    { "tier", "Set tier (seq_tier)",
+      OFFSET(tier), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS, "tier" },
+    { "main", NULL, 0, AV_OPT_TYPE_CONST,
+      { .i64 = 0 }, 0, 0, FLAGS, "tier" },
+    { "high", NULL, 0, AV_OPT_TYPE_CONST,
+      { .i64 = 1 }, 0, 0, FLAGS, "tier" },
+    { "level", "Set level (seq_level_idx)",
+      OFFSET(level), AV_OPT_TYPE_INT,
+      { .i64 = FF_LEVEL_UNKNOWN }, FF_LEVEL_UNKNOWN, 0x1f, FLAGS, "level" },
+
+#define LEVEL(name, value) name, NULL, 0, AV_OPT_TYPE_CONST, \
+      { .i64 = value }, 0, 0, FLAGS, "level"
+    { LEVEL("2.0",  0) },
+    { LEVEL("2.1",  1) },
+    { LEVEL("3.0",  4) },
+    { LEVEL("3.1",  5) },
+    { LEVEL("4.0",  8) },
+    { LEVEL("4.1",  9) },
+    { LEVEL("5.0", 12) },
+    { LEVEL("5.1", 13) },
+    { LEVEL("5.2", 14) },
+    { LEVEL("5.3", 15) },
+    { LEVEL("6.0", 16) },
+    { LEVEL("6.1", 17) },
+    { LEVEL("6.2", 18) },
+    { LEVEL("6.3", 19) },
+#undef LEVEL
+
+    { "tiles", "Tile columns x rows",
+      OFFSET(tile_cols), AV_OPT_TYPE_IMAGE_SIZE, { .str = "1x1" }, 1, AV1_MAX_TILE_COLS, FLAGS },
+    { "tile_groups", "Number of tile groups for encoding",
+      OFFSET(tile_groups), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, AV1_MAX_TILE_ROWS * AV1_MAX_TILE_COLS, FLAGS },
+
+    { NULL },
+};
+
+static const FFCodecDefault vaapi_encode_av1_defaults[] = {
+    { "b",              "0"   },
+    { "bf",             "7"   },
+    { "g",              "120" },
+    { "qmin",           "1"   },
+    { "qmax",           "255" },
+    { NULL },
+};
+
+static const AVClass vaapi_encode_av1_class = {
+    .class_name = "av1_vaapi",
+    .item_name  = av_default_item_name,
+    .option     = vaapi_encode_av1_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+const FFCodec ff_av1_vaapi_encoder = {
+    .p.name         = "av1_vaapi",
+    CODEC_LONG_NAME("AV1 (VAAPI)"),
+    .p.type         = AVMEDIA_TYPE_VIDEO,
+    .p.id           = AV_CODEC_ID_AV1,
+    .priv_data_size = sizeof(VAAPIEncodeAV1Context),
+    .init           = &vaapi_encode_av1_init,
+    FF_CODEC_RECEIVE_PACKET_CB(&ff_vaapi_encode_receive_packet),
+    .close          = &vaapi_encode_av1_close,
+    .p.priv_class   = &vaapi_encode_av1_class,
+    .p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE |
+                      AV_CODEC_CAP_DR1 | AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE,
+    .caps_internal  = FF_CODEC_CAP_NOT_INIT_THREADSAFE |
+                      FF_CODEC_CAP_INIT_CLEANUP,
+    .defaults       = vaapi_encode_av1_defaults,
+    .p.pix_fmts = (const enum AVPixelFormat[]) {
+        AV_PIX_FMT_VAAPI,
+        AV_PIX_FMT_NONE,
+    },
+    .hw_configs     = ff_vaapi_encode_hw_configs,
+    .p.wrapper_name = "vaapi",
+};