[FFmpeg-devel,v6,1/2] lavc/svt_hevc: add libsvt hevc encoder wrapper.

Submitted by Jing SUN on March 5, 2019, 7:43 a.m.

Details

Message ID 1551771805-17458-1-git-send-email-jing.a.sun@intel.com
State New
Headers show

Commit Message

Jing SUN March 5, 2019, 7:43 a.m.
From: Jing Sun <jing.a.sun@intel.com>

base on patch by Huang, Zhengxu from https://github.com/intel/SVT-HEVC

V4: - Fix the build error with new API in PR#52
    - Fix the encoding hang issue by API change in PR#52
    - Fix the last frame dropping issue
    - Fix the invalid parameter causing segmentation fault issue
    - Add the support to svt hevc and av1 plugins coexistance
    - Add the VMAF optimized mode to "-tune"
    - Add the "-hdr" parameter

V3: - Fix the build error with new API

V2: - Change the config options (didn't need to enable-gpl for BSD+Patent,
      it's can compatible with LGPL2+, thanks Xavier correct this part),
      now just need to "--enable-libsvthevc" option
    - Add force_idr option
    - Remove default GoP size setting in the wrapper, SVT-HEVC will calc
      the the GoP size internal
    - Refine the code as the FFmpeg community's comments
      (https://patchwork.ffmpeg.org/patch/11347/)

V1: - base on patch by Huang, Zhengxu, then refine some code.

Change-Id: If0dcc5044ab9effd6847a8f48797b985d02b0816
Signed-off-by: Huang, Zhengxu <zhengxu.huang@intel.com>
Signed-off-by: hassene <hassene.tmar@intel.com>
Signed-off-by: Jun Zhao <jun.zhao@intel.com>
Signed-off-by: Jing Sun <jing.a.sun@intel.com>
---
 configure                |   4 +
 libavcodec/Makefile      |   1 +
 libavcodec/allcodecs.c   |   1 +
 libavcodec/libsvt_hevc.c | 546 +++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 552 insertions(+)
 create mode 100644 libavcodec/libsvt_hevc.c

Comments

Vittorio Giovara March 5, 2019, 6:22 p.m.
On Tue, Mar 5, 2019 at 1:45 AM Jing SUN <jing.a.sun@intel.com> wrote:

> From: Jing Sun <jing.a.sun@intel.com>
>
> base on patch by Huang, Zhengxu from https://github.com/intel/SVT-HEVC
>
> V4: - Fix the build error with new API in PR#52
>     - Fix the encoding hang issue by API change in PR#52
>     - Fix the last frame dropping issue
>     - Fix the invalid parameter causing segmentation fault issue
>     - Add the support to svt hevc and av1 plugins coexistance
>     - Add the VMAF optimized mode to "-tune"
>     - Add the "-hdr" parameter
>

Is there any way to preserve the color matrix/primary/transfer properties
during encoding?
Mark Thompson March 5, 2019, 9:59 p.m.
On 05/03/2019 07:43, Jing SUN wrote:
> From: Jing Sun <jing.a.sun@intel.com>
> 
> base on patch by Huang, Zhengxu from https://github.com/intel/SVT-HEVC
> 
> V4: - Fix the build error with new API in PR#52
>     - Fix the encoding hang issue by API change in PR#52
>     - Fix the last frame dropping issue
>     - Fix the invalid parameter causing segmentation fault issue
>     - Add the support to svt hevc and av1 plugins coexistance
>     - Add the VMAF optimized mode to "-tune"
>     - Add the "-hdr" parameter
> 
> V3: - Fix the build error with new API
> 
> V2: - Change the config options (didn't need to enable-gpl for BSD+Patent,
>       it's can compatible with LGPL2+, thanks Xavier correct this part),
>       now just need to "--enable-libsvthevc" option
>     - Add force_idr option
>     - Remove default GoP size setting in the wrapper, SVT-HEVC will calc
>       the the GoP size internal
>     - Refine the code as the FFmpeg community's comments
>       (https://patchwork.ffmpeg.org/patch/11347/)
> 
> V1: - base on patch by Huang, Zhengxu, then refine some code.
> 
> Change-Id: If0dcc5044ab9effd6847a8f48797b985d02b0816
> Signed-off-by: Huang, Zhengxu <zhengxu.huang@intel.com>
> Signed-off-by: hassene <hassene.tmar@intel.com>
> Signed-off-by: Jun Zhao <jun.zhao@intel.com>
> Signed-off-by: Jing Sun <jing.a.sun@intel.com>
> ---
>  configure                |   4 +
>  libavcodec/Makefile      |   1 +
>  libavcodec/allcodecs.c   |   1 +
>  libavcodec/libsvt_hevc.c | 546 +++++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 552 insertions(+)
>  create mode 100644 libavcodec/libsvt_hevc.c
> 
> ...
> 
> diff --git a/libavcodec/libsvt_hevc.c b/libavcodec/libsvt_hevc.c
> new file mode 100644
> index 0000000..97bd204
> --- /dev/null
> +++ b/libavcodec/libsvt_hevc.c
> @@ -0,0 +1,546 @@
> +/*
> +* Scalable Video Technology for HEVC encoder library plugin
> +*
> +* Copyright (c) 2018 Intel Corporation
> +*
> +* This file is part of FFmpeg.
> +*
> +* FFmpeg is free software; you can redistribute it and/or
> +* modify it under the terms of the GNU Lesser General Public
> +* License as published by the Free Software Foundation; either
> +* version 2.1 of the License, or (at your option) any later version.
> +*
> +* FFmpeg is distributed in the hope that it will be useful,
> +* but WITHOUT ANY WARRANTY; without even the implied warranty of
> +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +* Lesser General Public License for more details.
> +*
> +* You should have received a copy of the GNU Lesser General Public
> +* License along with this program; if not, write to the Free Software
> +* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> +*/
> +
> +#include "svt-hevc/EbErrorCodes.h"
> +#include "svt-hevc/EbTime.h"
> +#include "svt-hevc/EbApi.h"
> +
> +#include "libavutil/common.h"
> +#include "libavutil/frame.h"
> +#include "libavutil/opt.h"
> +
> +#include "internal.h"
> +#include "avcodec.h"
> +
> +typedef enum eos_status {
> +    EOS_NOT_REACHED = 0,
> +    EOS_REACHED,
> +    EOS_TOTRIGGER
> +}EOS_STATUS;
> +
> +typedef struct SvtContext {
> +    AVClass     *class;
> +
> +    EB_H265_ENC_CONFIGURATION  enc_params;
> +    EB_COMPONENTTYPE           *svt_handle;
> +
> +    EB_BUFFERHEADERTYPE        *in_buf;

This structure appears have exactly the same lifetime as the encoder itself - can you just put it directly in the SvtContext rather than allocating/freeing it separately?

> +    int                         raw_size;

This variable seems to be write-only.

> +
> +    EOS_STATUS eos_flag;
> +
> +    // User options.
> +    int vui_info;
> +    int hierarchical_level;
> +    int la_depth;
> +    int enc_mode;
> +    int rc_mode;
> +    int scd;
> +    int tune;
> +    int qp;
> +    int hdr;
> +
> +    int forced_idr;
> +
> +    int aud;
> +
> +    int profile;
> +    int tier;
> +    int level;
> +
> +    int base_layer_switch_mode;
> +} SvtContext;
> +
> +static int error_mapping(EB_ERRORTYPE svt_ret)
> +{
> +    int err;
> +
> +    switch (svt_ret) {
> +    case EB_ErrorInsufficientResources:
> +        err = AVERROR(ENOMEM);
> +        break;
> +
> +    case EB_ErrorUndefined:
> +    case EB_ErrorInvalidComponent:
> +    case EB_ErrorBadParameter:
> +        err = AVERROR(EINVAL);
> +        break;
> +
> +    case EB_ErrorDestroyThreadFailed:
> +    case EB_ErrorSemaphoreUnresponsive:
> +    case EB_ErrorDestroySemaphoreFailed:
> +    case EB_ErrorCreateMutexFailed:
> +    case EB_ErrorMutexUnresponsive:
> +    case EB_ErrorDestroyMutexFailed:
> +        err = AVERROR_EXTERNAL;
> +            break;
> +
> +    case EB_NoErrorEmptyQueue:
> +        err = AVERROR(EAGAIN);
> +
> +    case EB_ErrorNone:
> +        err = 0;
> +        break;
> +
> +    default:
> +        err = AVERROR_UNKNOWN;
> +    }
> +
> +    return err;
> +}
> +
> +static void free_buffer(SvtContext *svt_enc)
> +{
> +    if (svt_enc->in_buf) {
> +        EB_H265_ENC_INPUT *in_data = (EB_H265_ENC_INPUT *)svt_enc->in_buf->pBuffer;
> +        av_freep(&in_data);
> +        av_freep(&svt_enc->in_buf);
> +    }
> +}
> +
> +static int alloc_buffer(EB_H265_ENC_CONFIGURATION *config, SvtContext *svt_enc)
> +{
> +    const int    pack_mode_10bit   =
> +        (config->encoderBitDepth > 8) && (config->compressedTenBitFormat == 0) ? 1 : 0;
> +    const size_t luma_size_8bit    =
> +        config->sourceWidth * config->sourceHeight * (1 << pack_mode_10bit);
> +    const size_t luma_size_10bit   =
> +        (config->encoderBitDepth > 8 && pack_mode_10bit == 0) ? luma_size_8bit : 0;
> +
> +    EB_H265_ENC_INPUT *in_data;
> +
> +    svt_enc->raw_size = (luma_size_8bit + luma_size_10bit) * 3 / 2;

This isn't used anywhere.

> +    // allocate buffer for in and out
> +    svt_enc->in_buf           = av_mallocz(sizeof(*svt_enc->in_buf));
> +    if (!svt_enc->in_buf)
> +        goto failed;
> +
> +    in_data  = av_mallocz(sizeof(*in_data));
> +    if (!in_data)
> +        goto failed;
> +    svt_enc->in_buf->pBuffer  = (unsigned char *)in_data;
> +
> +    svt_enc->in_buf->nSize        = sizeof(*svt_enc->in_buf);
> +    svt_enc->in_buf->pAppPrivate  = NULL;

The EB_H265_ENC_INPUT structure also looks like it has the same lifetime as the encoder, so should be in the context structure?

> +
> +    return 0;
> +
> +failed:
> +    free_buffer(svt_enc);
> +    return AVERROR(ENOMEM);
> +}
> +
> +static int config_enc_params(EB_H265_ENC_CONFIGURATION *param,
> +                             AVCodecContext *avctx)
> +{
> +    SvtContext *svt_enc = avctx->priv_data;
> +    int             ret;
> +    int        ten_bits = 0;
> +
> +    param->sourceWidth     = avctx->width;
> +    param->sourceHeight    = avctx->height;
> +
> +    if (avctx->pix_fmt == AV_PIX_FMT_YUV420P10LE) {
> +        av_log(avctx, AV_LOG_DEBUG , "Encoder 10 bits depth input\n");
> +        // Disable Compressed 10-bit format default
> +        //
> +        // SVT-HEVC support a compressed 10-bit format allowing the
> +        // software to achieve a higher speed and channel density levels.
> +        // The conversion between the 10-bit yuv420p10le and the compressed
> +        // 10-bit format is a lossless operation. But in FFmpeg, we usually
> +        // didn't use this format

If it improves speed beyond some minor constant factor for an immediate conversion in the encoder itself then should we in fact be doing this conversion here?

> +        param->compressedTenBitFormat = 0;
> +        ten_bits = 1;
> +    }
> +
> +    // Update param from options
> +    param->hierarchicalLevels     = svt_enc->hierarchical_level - 1;
> +    param->encMode                = svt_enc->enc_mode;
> +    param->profile                = svt_enc->profile;
> +    param->tier                   = svt_enc->tier;
> +    param->level                  = svt_enc->level;
> +    param->rateControlMode        = svt_enc->rc_mode;
> +    param->sceneChangeDetection   = svt_enc->scd;
> +    param->tune                   = svt_enc->tune;
> +    param->baseLayerSwitchMode    = svt_enc->base_layer_switch_mode;
> +    param->qp                     = svt_enc->qp;
> +    param->accessUnitDelimiter    = svt_enc->aud;
> +
> +    param->targetBitRate          = avctx->bit_rate;
> +    if (avctx->gop_size > 0)
> +        param->intraPeriodLength  = avctx->gop_size - 1;
> +
> +    if (avctx->framerate.num > 0 && avctx->framerate.den > 0) {
> +        param->frameRateNumerator     = avctx->framerate.num;
> +        param->frameRateDenominator   = avctx->framerate.den * avctx->ticks_per_frame;
> +    } else {
> +        param->frameRateNumerator     = avctx->time_base.den;
> +        param->frameRateDenominator   = avctx->time_base.num * avctx->ticks_per_frame;
> +    }
> +
> +    if (param->rateControlMode) {
> +        param->maxQpAllowed       = avctx->qmax;
> +        param->minQpAllowed       = avctx->qmin;
> +    }
> +
> +    param->intraRefreshType       =
> +        !!(avctx->flags & AV_CODEC_FLAG_CLOSED_GOP) + 1;
> +
> +    // is it repeat headers for MP4 or Annex-b
> +    if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER)
> +        param->codeVpsSpsPps          = 0;
> +    else
> +        param->codeVpsSpsPps          = 1;
> +
> +    param->codeEosNal             = 1;
> +
> +    if (svt_enc->hdr) {
> +        svt_enc->vui_info = 1;
> +        param->highDynamicRangeInput = svt_enc->hdr;

Doesn't this need more parameters to do anything?  The colourspace information at least, maybe the light level and display stuff as well (depending on which sort of HDR you are supporting).

> +    }
> +
> +    if (svt_enc->vui_info)
> +        param->videoUsabilityInfo = svt_enc->vui_info;
> +
> +    if (svt_enc->la_depth != -1)
> +        param->lookAheadDistance  = svt_enc->la_depth;
> +
> +    if (ten_bits) {
> +        param->encoderBitDepth        = 10;
> +    }
> +
> +    ret = alloc_buffer(param, svt_enc);
> +
> +    return ret;
> +}
> +
> +static void read_in_data(EB_H265_ENC_CONFIGURATION *config,
> +                         const AVFrame *frame,
> +                         EB_BUFFERHEADERTYPE *headerPtr)
> +{
> +    uint8_t is16bit = config->encoderBitDepth > 8;
> +    uint64_t luma_size =
> +        (uint64_t)config->sourceWidth * config->sourceHeight<< is16bit;
> +    EB_H265_ENC_INPUT *in_data = (EB_H265_ENC_INPUT *)headerPtr->pBuffer;
> +
> +    // support yuv420p and yuv420p010
> +    in_data->luma = frame->data[0];
> +    in_data->cb   = frame->data[1];
> +    in_data->cr   = frame->data[2];
> +
> +    // stride info
> +    in_data->yStride  = frame->linesize[0] >> is16bit;
> +    in_data->cbStride = frame->linesize[1] >> is16bit;
> +    in_data->crStride = frame->linesize[2] >> is16bit;
> +
> +    headerPtr->nFilledLen   += luma_size * 3/2u;

What does this value actually do?  Since the planes need not be in the same buffer, it doesn't seem likely to be meaningful.

> +}
> +
> +static av_cold int eb_enc_init(AVCodecContext *avctx)
> +{
> +    SvtContext   *svt_enc = avctx->priv_data;
> +    EB_ERRORTYPE svt_ret;
> +
> +    svt_enc->eos_flag = EOS_NOT_REACHED;
> +
> +    svt_ret = EbInitHandle(&svt_enc->svt_handle, svt_enc, &svt_enc->enc_params);
> +    if (svt_ret != EB_ErrorNone) {
> +        av_log(avctx, AV_LOG_ERROR, "Error init encoder handle\n");
> +        goto failed;
> +    }
> +
> +    svt_ret = config_enc_params(&svt_enc->enc_params, avctx);
> +    if (svt_ret != EB_ErrorNone) {
> +        av_log(avctx, AV_LOG_ERROR, "Error configure encoder parameters\n");
> +        goto failed_init_handle;
> +    }
> +
> +    svt_ret = EbH265EncSetParameter(svt_enc->svt_handle, &svt_enc->enc_params);
> +    if (svt_ret != EB_ErrorNone) {
> +        av_log(avctx, AV_LOG_ERROR, "Error setting encoder parameters\n");
> +        goto failed_init_handle;
> +    }
> +
> +    svt_ret = EbInitEncoder(svt_enc->svt_handle);
> +    if (svt_ret != EB_ErrorNone) {
> +        av_log(avctx, AV_LOG_ERROR, "Error init encoder\n");
> +        goto failed_init_handle;
> +    }
> +
> +    if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
> +        EB_BUFFERHEADERTYPE *headerPtr = NULL;

Please avoid using camelCase unless required by the API.

> +
> +        svt_ret = EbH265EncStreamHeader(svt_enc->svt_handle, &headerPtr);
> +        if (svt_ret != EB_ErrorNone) {
> +            av_log(avctx, AV_LOG_ERROR, "Error when build stream header.\n");
> +            goto failed_init_enc;
> +        }
> +
> +        avctx->extradata_size = headerPtr->nFilledLen;
> +        avctx->extradata = av_mallocz(avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
> +        if (!avctx->extradata) {
> +            av_log(avctx, AV_LOG_ERROR,
> +                   "Cannot allocate HEVC header of size %d.\n", avctx->extradata_size);
> +            svt_ret = EB_ErrorInsufficientResources;
> +            goto failed_init_enc;
> +        }
> +        memcpy(avctx->extradata, headerPtr->pBuffer, avctx->extradata_size);
> +    }
> +
> +    return 0;
> +
> +failed_init_enc:
> +    EbDeinitEncoder(svt_enc->svt_handle);
> +failed_init_handle:
> +    EbDeinitHandle(svt_enc->svt_handle);
> +failed:
> +    free_buffer(svt_enc);
> +    svt_enc->svt_handle = NULL;
> +    svt_enc = NULL;
> +    return error_mapping(svt_ret);
> +}
> +
> +static int eb_send_frame(AVCodecContext *avctx, const AVFrame *frame)
> +{
> +    SvtContext           *svt_enc = avctx->priv_data;
> +    EB_BUFFERHEADERTYPE  *headerPtr = svt_enc->in_buf;
> +
> +    if (!frame) {
> +        EB_BUFFERHEADERTYPE headerPtrLast;
> +        headerPtrLast.nAllocLen   = 0;
> +        headerPtrLast.nFilledLen  = 0;
> +        headerPtrLast.nTickCount  = 0;
> +        headerPtrLast.pAppPrivate = NULL;
> +        headerPtrLast.pBuffer     = NULL;
> +        headerPtrLast.nFlags      = EB_BUFFERFLAG_EOS;
> +
> +        EbH265EncSendPicture(svt_enc->svt_handle, &headerPtrLast);

Can this call ever fail?  The API indicates it returns an error code.

> +        svt_enc->eos_flag = EOS_REACHED;
> +        av_log(avctx, AV_LOG_DEBUG, "Finish sending frames!!!\n");
> +        return 0;
> +    }
> +
> +    read_in_data(&svt_enc->enc_params, frame, headerPtr);
> +
> +    headerPtr->nFlags       = 0;
> +    headerPtr->pAppPrivate  = NULL;
> +    headerPtr->pts          = frame->pts;
> +    switch (frame->pict_type) {
> +    case AV_PICTURE_TYPE_I:
> +        headerPtr->sliceType = svt_enc->forced_idr > 0 ? EB_IDR_PICTURE : EB_I_PICTURE;
> +        break;
> +    case AV_PICTURE_TYPE_P:
> +        headerPtr->sliceType = EB_P_PICTURE;
> +        break;
> +    case AV_PICTURE_TYPE_B:
> +        headerPtr->sliceType = EB_B_PICTURE;
> +        break;
> +    default:
> +        headerPtr->sliceType = EB_INVALID_PICTURE;
> +        break;
> +    }
> +    EbH265EncSendPicture(svt_enc->svt_handle, headerPtr);

Same comment as above about failure.

With the current logic it looks like this API call is entirely synchronous, so in the case of lookahead the input will immediately be copied somewhere else inside the encoder so that the input frame can be discarded on return.  Is that correct?

> +
> +    return 0;
> +}
> +
> +static int eb_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
> +{
> +    SvtContext  *svt_enc = avctx->priv_data;
> +    EB_BUFFERHEADERTYPE   *headerPtr = NULL;
> +    EB_ERRORTYPE          svt_ret;
> +    int ret = 0;
> +
> +    if (EOS_TOTRIGGER == svt_enc->eos_flag) {
> +        pkt = NULL;
> +        return AVERROR_EOF;
> +    }
> +
> +    svt_ret = EbH265GetPacket(svt_enc->svt_handle, &headerPtr, svt_enc->eos_flag);
> +    if (svt_ret == EB_NoErrorEmptyQueue)
> +        return AVERROR(EAGAIN);
> +
> +    if ((ret = ff_alloc_packet2(avctx, pkt, headerPtr->nFilledLen, 0)) < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to allocate output packet.\n");
> +        EbH265ReleaseOutBuffer(&headerPtr);
> +        return ret;
> +    }
> +
> +    memcpy(pkt->data, headerPtr->pBuffer, headerPtr->nFilledLen);
> +
> +    pkt->size = headerPtr->nFilledLen;
> +    pkt->pts  = headerPtr->pts;
> +    pkt->dts  = headerPtr->dts;
> +    if (headerPtr->sliceType == EB_IDR_PICTURE)> +        pkt->flags |= AV_PKT_FLAG_KEY;

Should CRA pictures also be marked as key frames?

> +    if (headerPtr->sliceType == EB_NON_REF_PICTURE)
> +        pkt->flags |= AV_PKT_FLAG_DISPOSABLE;
> +
> +    EbH265ReleaseOutBuffer(&headerPtr);
> +
> +    if (EB_BUFFERFLAG_EOS == headerPtr->nFlags)
> +        svt_enc->eos_flag = EOS_TOTRIGGER;
> +
> +    return 0;
> +}
> +
> +static av_cold int eb_enc_close(AVCodecContext *avctx)
> +{
> +    SvtContext *svt_enc = avctx->priv_data;
> +
> +    if (svt_enc->svt_handle) {
> +        EbDeinitEncoder(svt_enc->svt_handle);
> +        EbDeinitHandle(svt_enc->svt_handle);
> +        svt_enc->svt_handle = NULL;
> +    }
> +
> +    if (svt_enc) {

If this test is false then you already invoked undefined behaviour by dereferencing the pointer above.

> +        free_buffer(svt_enc);
> +        svt_enc = NULL;
> +    }
> +
> +    return 0;
> +}
> +
> +#define OFFSET(x) offsetof(SvtContext, x)
> +#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
> +static const AVOption options[] = {
> +    { "vui", "Enable vui info", OFFSET(vui_info),
> +      AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, VE },
> +
> +    { "aud", "Include AUD", OFFSET(aud),
> +      AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
> +
> +    { "hielevel", "Hierarchical prediction levels setting", OFFSET(hierarchical_level),
> +      AV_OPT_TYPE_INT, { .i64 = 4 }, 1, 4, VE , "hielevel"},
> +        { "flat",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 },  INT_MIN, INT_MAX, VE, "hielevel" },
> +        { "2level", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 2 },  INT_MIN, INT_MAX, VE, "hielevel" },
> +        { "3level", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 3 },  INT_MIN, INT_MAX, VE, "hielevel" },
> +        { "4level", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 4 },  INT_MIN, INT_MAX, VE, "hielevel" },
> +
> +    { "la_depth", "Look ahead distance [0, 256]", OFFSET(la_depth),
> +      AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 256, VE },
> +
> +    { "preset", "Encoding preset [0, 12] (e,g, for subjective quality tuning mode and >=4k resolution), [0, 10] (for >= 1080p resolution), [0, 9] (for all resolution and modes)",
> +      OFFSET(enc_mode), AV_OPT_TYPE_INT, { .i64 = 9 }, 0, 12, VE },
> +
> +    { "profile", "Profile setting, Main Still Picture Profile not supported", OFFSET(profile),
> +      AV_OPT_TYPE_INT, { .i64 = FF_PROFILE_HEVC_MAIN_10 }, FF_PROFILE_HEVC_MAIN, FF_PROFILE_HEVC_MAIN_10, VE, "profile"},
> +
> +#define PROFILE(name, value)  name, NULL, 0, AV_OPT_TYPE_CONST, \
> +    { .i64 = value }, 0, 0, VE, "profile"
> +        { PROFILE("main",   FF_PROFILE_HEVC_MAIN)    },
> +        { PROFILE("main10", FF_PROFILE_HEVC_MAIN_10) },
> +#undef PROFILE
> +
> +    { "tier", "Set tier (general_tier_flag)", OFFSET(tier),
> +      AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE, "tier" },
> +        { "main", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, VE, "tier" },
> +        { "high", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, VE, "tier" },
> +
> +    { "level", "Set level (level_idc)", OFFSET(level),
> +      AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 0xff, VE, "level" },
> +
> +#define LEVEL(name, value) name, NULL, 0, AV_OPT_TYPE_CONST, \
> +      { .i64 = value }, 0, 0, VE, "level"
> +        { LEVEL("1",   10) },
> +        { LEVEL("2",   20) },
> +        { LEVEL("2.1", 21) },
> +        { LEVEL("3",   30) },
> +        { LEVEL("3.1", 31) },
> +        { LEVEL("4",   40) },
> +        { LEVEL("4.1", 41) },
> +        { LEVEL("5",   50) },
> +        { LEVEL("5.1", 51) },
> +        { LEVEL("5.2", 52) },
> +        { LEVEL("6",   60) },
> +        { LEVEL("6.1", 61) },
> +        { LEVEL("6.2", 62) },

These numbers look suspicious.  Is it actually meant to match general_level_idc (which is level value * 30), or is it using the H.264 convention?  If the latter, please remove the reference to level_idc (because it's wrong) and add a comment explaining the values.

> +#undef LEVEL
> +
> +    { "rc", "Bit rate control mode", OFFSET(rc_mode),
> +      AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE , "rc"},
> +        { "cqp", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 0 },  INT_MIN, INT_MAX, VE, "rc" },
> +        { "vbr", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 },  INT_MIN, INT_MAX, VE, "rc" },

Is there an enum for this somewhere?

> +
> +    { "qp", "QP value for intra frames", OFFSET(qp),
> +      AV_OPT_TYPE_INT, { .i64 = 32 }, 0, 51, VE },

The use above suggests this applies to all frames in CQP mode, and maybe not at all in VBR mode.

Is the value mapped at all to make the real QP here?  That is, if I wanted to encode a 10-bit input at very high quality with QP -6, is that possible and what would I set this value to?

> +
> +    { "sc_detection", "Scene change detection", OFFSET(scd),
> +      AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, VE },
> +
> +    { "tune", "Quality tuning mode", OFFSET(tune), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 2, VE, "tune" },
> +        { "sq", "Visually optimized mode", 0,
> +          AV_OPT_TYPE_CONST, { .i64 = 0 },  INT_MIN, INT_MAX, VE, "tune" },
> +        { "oq",  "PSNR / SSIM optimized mode",  0,
> +          AV_OPT_TYPE_CONST, { .i64 = 1 },  INT_MIN, INT_MAX, VE, "tune" },
> +        { "vmaf", "VMAF optimized mode", 0,
> +          AV_OPT_TYPE_CONST, { .i64 = 2 },  INT_MIN, INT_MAX, VE, "tune" },

Is there an enum for this somewhere?

> +
> +    { "bl_mode", "Random Access Prediction Structure type setting", OFFSET(base_layer_switch_mode),
> +      AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },

The help text here doesn't say anything about what the option actually does.

For bool variables, you generally want the help text to roughly fit in a sentence like "when you set this option, it will <help text>" (like "include AUD" or "enable VUI info" above).

> +
> +    { "forced-idr", "If forcing keyframes, force them as IDR frames.", OFFSET(forced_idr),
> +      AV_OPT_TYPE_BOOL,   { .i64 = 0 }, -1, 1, VE },
> +
> +    { "hdr", "High dynamic range input", OFFSET(hdr),
> +      AV_OPT_TYPE_BOOL,   { .i64 = 0 }, 0, 1, VE },

As noted above, this option needs more metadata somewhere.

> +
> +    {NULL},
> +};
> +
> +static const AVClass class = {
> +    .class_name = "libsvt_hevc",
> +    .item_name  = av_default_item_name,
> +    .option     = options,
> +    .version    = LIBAVUTIL_VERSION_INT,
> +};
> +
> +static const AVCodecDefault eb_enc_defaults[] = {
> +    { "b",         "7M"    },
> +    { "flags",     "+cgop" },
> +    { "qmin",      "10"    },
> +    { "qmax",      "48"    },
> +    { "g",         "-2"    },

What does GOP size -2 mean?

> +    { NULL },
> +};
> +
> +AVCodec ff_libsvt_hevc_encoder = {
> +    .name           = "libsvt_hevc",
> +    .long_name      = NULL_IF_CONFIG_SMALL("SVT-HEVC(Scalable Video Technology for HEVC) encoder"),
> +    .priv_data_size = sizeof(SvtContext),
> +    .type           = AVMEDIA_TYPE_VIDEO,
> +    .id             = AV_CODEC_ID_HEVC,
> +    .init           = eb_enc_init,
> +    .send_frame     = eb_send_frame,
> +    .receive_packet = eb_receive_packet,
> +    .close          = eb_enc_close,
> +    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AUTO_THREADS,
> +    .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P,
> +                                                    AV_PIX_FMT_YUV420P10,
> +                                                    AV_PIX_FMT_NONE },
> +    .priv_class     = &class,
> +    .defaults       = eb_enc_defaults,
> +    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
> +    .wrapper_name   = "libsvt_hevc",
> +};
> 

- Mark
Carl Eugen Hoyos March 6, 2019, 8:01 a.m.
2019-03-05 8:43 GMT+01:00, Jing SUN <jing.a.sun@intel.com>:

> +enabled libsvthevc        && require_pkg_config libsvthevc SvtHevcEnc
> svt-hevc/EbApi.h EbInitHandle

What funny optional dependencies does this library have that
justifies requiring pkg_config?

Carl Eugen
Jing SUN March 8, 2019, 9:37 a.m.
On 05/03/2019 07:43, Jing SUN wrote:
> From: Jing Sun <jing.a.sun@intel.com>

> 

> base on patch by Huang, Zhengxu from https://github.com/intel/SVT-HEVC

> 

> V4: - Fix the build error with new API in PR#52

>     - Fix the encoding hang issue by API change in PR#52

>     - Fix the last frame dropping issue

>     - Fix the invalid parameter causing segmentation fault issue

>     - Add the support to svt hevc and av1 plugins coexistance

>     - Add the VMAF optimized mode to "-tune"

>     - Add the "-hdr" parameter

> 

> V3: - Fix the build error with new API

> 

> V2: - Change the config options (didn't need to enable-gpl for BSD+Patent,

>       it's can compatible with LGPL2+, thanks Xavier correct this part),

>       now just need to "--enable-libsvthevc" option

>     - Add force_idr option

>     - Remove default GoP size setting in the wrapper, SVT-HEVC will calc

>       the the GoP size internal

>     - Refine the code as the FFmpeg community's comments

>       (https://patchwork.ffmpeg.org/patch/11347/)

> 

> V1: - base on patch by Huang, Zhengxu, then refine some code.

> 

> Change-Id: If0dcc5044ab9effd6847a8f48797b985d02b0816

> Signed-off-by: Huang, Zhengxu <zhengxu.huang@intel.com>

> Signed-off-by: hassene <hassene.tmar@intel.com>

> Signed-off-by: Jun Zhao <jun.zhao@intel.com>

> Signed-off-by: Jing Sun <jing.a.sun@intel.com>

> ---

>  configure                |   4 +

>  libavcodec/Makefile      |   1 +

>  libavcodec/allcodecs.c   |   1 +

>  libavcodec/libsvt_hevc.c | 546 

> +++++++++++++++++++++++++++++++++++++++++++++++

>  4 files changed, 552 insertions(+)

>  create mode 100644 libavcodec/libsvt_hevc.c

> 

> ...

> 

> diff --git a/libavcodec/libsvt_hevc.c b/libavcodec/libsvt_hevc.c new 

> file mode 100644 index 0000000..97bd204

> --- /dev/null

> +++ b/libavcodec/libsvt_hevc.c

> @@ -0,0 +1,546 @@

> +/*

> +* Scalable Video Technology for HEVC encoder library plugin

> +*

> +* Copyright (c) 2018 Intel Corporation

> +*

> +* This file is part of FFmpeg.

> +*

> +* FFmpeg is free software; you can redistribute it and/or

> +* modify it under the terms of the GNU Lesser General Public

> +* License as published by the Free Software Foundation; either

> +* version 2.1 of the License, or (at your option) any later version.

> +*

> +* FFmpeg is distributed in the hope that it will be useful,

> +* but WITHOUT ANY WARRANTY; without even the implied warranty of

> +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

> +* Lesser General Public License for more details.

> +*

> +* You should have received a copy of the GNU Lesser General Public

> +* License along with this program; if not, write to the Free Software

> +* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 

> +02110-1301 USA */

> +

> +#include "svt-hevc/EbErrorCodes.h"

> +#include "svt-hevc/EbTime.h"

> +#include "svt-hevc/EbApi.h"

> +

> +#include "libavutil/common.h"

> +#include "libavutil/frame.h"

> +#include "libavutil/opt.h"

> +

> +#include "internal.h"

> +#include "avcodec.h"

> +

> +typedef enum eos_status {

> +    EOS_NOT_REACHED = 0,

> +    EOS_REACHED,

> +    EOS_TOTRIGGER

> +}EOS_STATUS;

> +

> +typedef struct SvtContext {

> +    AVClass     *class;

> +

> +    EB_H265_ENC_CONFIGURATION  enc_params;

> +    EB_COMPONENTTYPE           *svt_handle;

> +

> +    EB_BUFFERHEADERTYPE        *in_buf;


This structure appears have exactly the same lifetime as the encoder itself - can you just put it directly in the SvtContext rather than allocating/freeing it separately?
[SUN, Jing] To be modified in v7.

> +    int                         raw_size;


This variable seems to be write-only.
[SUN, Jing] To be modified in v7.

> +

> +    EOS_STATUS eos_flag;

> +

> +    // User options.

> +    int vui_info;

> +    int hierarchical_level;

> +    int la_depth;

> +    int enc_mode;

> +    int rc_mode;

> +    int scd;

> +    int tune;

> +    int qp;

> +    int hdr;

> +

> +    int forced_idr;

> +

> +    int aud;

> +

> +    int profile;

> +    int tier;

> +    int level;

> +

> +    int base_layer_switch_mode;

> +} SvtContext;

> +

> +static int error_mapping(EB_ERRORTYPE svt_ret) {

> +    int err;

> +

> +    switch (svt_ret) {

> +    case EB_ErrorInsufficientResources:

> +        err = AVERROR(ENOMEM);

> +        break;

> +

> +    case EB_ErrorUndefined:

> +    case EB_ErrorInvalidComponent:

> +    case EB_ErrorBadParameter:

> +        err = AVERROR(EINVAL);

> +        break;

> +

> +    case EB_ErrorDestroyThreadFailed:

> +    case EB_ErrorSemaphoreUnresponsive:

> +    case EB_ErrorDestroySemaphoreFailed:

> +    case EB_ErrorCreateMutexFailed:

> +    case EB_ErrorMutexUnresponsive:

> +    case EB_ErrorDestroyMutexFailed:

> +        err = AVERROR_EXTERNAL;

> +            break;

> +

> +    case EB_NoErrorEmptyQueue:

> +        err = AVERROR(EAGAIN);

> +

> +    case EB_ErrorNone:

> +        err = 0;

> +        break;

> +

> +    default:

> +        err = AVERROR_UNKNOWN;

> +    }

> +

> +    return err;

> +}

> +

> +static void free_buffer(SvtContext *svt_enc) {

> +    if (svt_enc->in_buf) {

> +        EB_H265_ENC_INPUT *in_data = (EB_H265_ENC_INPUT *)svt_enc->in_buf->pBuffer;

> +        av_freep(&in_data);

> +        av_freep(&svt_enc->in_buf);

> +    }

> +}

> +

> +static int alloc_buffer(EB_H265_ENC_CONFIGURATION *config, SvtContext 

> +*svt_enc) {

> +    const int    pack_mode_10bit   =

> +        (config->encoderBitDepth > 8) && (config->compressedTenBitFormat == 0) ? 1 : 0;

> +    const size_t luma_size_8bit    =

> +        config->sourceWidth * config->sourceHeight * (1 << pack_mode_10bit);

> +    const size_t luma_size_10bit   =

> +        (config->encoderBitDepth > 8 && pack_mode_10bit == 0) ? 

> +luma_size_8bit : 0;

> +

> +    EB_H265_ENC_INPUT *in_data;

> +

> +    svt_enc->raw_size = (luma_size_8bit + luma_size_10bit) * 3 / 2;


This isn't used anywhere.
[SUN, Jing] To be modified in v7.

> +    // allocate buffer for in and out

> +    svt_enc->in_buf           = av_mallocz(sizeof(*svt_enc->in_buf));

> +    if (!svt_enc->in_buf)

> +        goto failed;

> +

> +    in_data  = av_mallocz(sizeof(*in_data));

> +    if (!in_data)

> +        goto failed;

> +    svt_enc->in_buf->pBuffer  = (unsigned char *)in_data;

> +

> +    svt_enc->in_buf->nSize        = sizeof(*svt_enc->in_buf);

> +    svt_enc->in_buf->pAppPrivate  = NULL;


The EB_H265_ENC_INPUT structure also looks like it has the same lifetime as the encoder, so should be in the context structure?
[SUN, Jing] pBuffer is an element of the EB_BUFFERHEADERTYPE structure, defined as the pointer type. So I think it makes sense to use it this way.

> +

> +    return 0;

> +

> +failed:

> +    free_buffer(svt_enc);

> +    return AVERROR(ENOMEM);

> +}

> +

> +static int config_enc_params(EB_H265_ENC_CONFIGURATION *param,

> +                             AVCodecContext *avctx) {

> +    SvtContext *svt_enc = avctx->priv_data;

> +    int             ret;

> +    int        ten_bits = 0;

> +

> +    param->sourceWidth     = avctx->width;

> +    param->sourceHeight    = avctx->height;

> +

> +    if (avctx->pix_fmt == AV_PIX_FMT_YUV420P10LE) {

> +        av_log(avctx, AV_LOG_DEBUG , "Encoder 10 bits depth input\n");

> +        // Disable Compressed 10-bit format default

> +        //

> +        // SVT-HEVC support a compressed 10-bit format allowing the

> +        // software to achieve a higher speed and channel density levels.

> +        // The conversion between the 10-bit yuv420p10le and the compressed

> +        // 10-bit format is a lossless operation. But in FFmpeg, we usually

> +        // didn't use this format


If it improves speed beyond some minor constant factor for an immediate conversion in the encoder itself then should we in fact be doing this conversion here?
[SUN, Jing] The conversion is to be conducted on the source frames. And the op itself costs time, although the encoding after that speeds up because of it. Disable it for now and will consider how to enable it for the plugin and whether the degree of speed-up is worth the SW conversion cost in the future.

> +        param->compressedTenBitFormat = 0;

> +        ten_bits = 1;

> +    }

> +

> +    // Update param from options

> +    param->hierarchicalLevels     = svt_enc->hierarchical_level - 1;

> +    param->encMode                = svt_enc->enc_mode;

> +    param->profile                = svt_enc->profile;

> +    param->tier                   = svt_enc->tier;

> +    param->level                  = svt_enc->level;

> +    param->rateControlMode        = svt_enc->rc_mode;

> +    param->sceneChangeDetection   = svt_enc->scd;

> +    param->tune                   = svt_enc->tune;

> +    param->baseLayerSwitchMode    = svt_enc->base_layer_switch_mode;

> +    param->qp                     = svt_enc->qp;

> +    param->accessUnitDelimiter    = svt_enc->aud;

> +

> +    param->targetBitRate          = avctx->bit_rate;

> +    if (avctx->gop_size > 0)

> +        param->intraPeriodLength  = avctx->gop_size - 1;

> +

> +    if (avctx->framerate.num > 0 && avctx->framerate.den > 0) {

> +        param->frameRateNumerator     = avctx->framerate.num;

> +        param->frameRateDenominator   = avctx->framerate.den * avctx->ticks_per_frame;

> +    } else {

> +        param->frameRateNumerator     = avctx->time_base.den;

> +        param->frameRateDenominator   = avctx->time_base.num * avctx->ticks_per_frame;

> +    }

> +

> +    if (param->rateControlMode) {

> +        param->maxQpAllowed       = avctx->qmax;

> +        param->minQpAllowed       = avctx->qmin;

> +    }

> +

> +    param->intraRefreshType       =

> +        !!(avctx->flags & AV_CODEC_FLAG_CLOSED_GOP) + 1;

> +

> +    // is it repeat headers for MP4 or Annex-b

> +    if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER)

> +        param->codeVpsSpsPps          = 0;

> +    else

> +        param->codeVpsSpsPps          = 1;

> +

> +    param->codeEosNal             = 1;

> +

> +    if (svt_enc->hdr) {

> +        svt_enc->vui_info = 1;

> +        param->highDynamicRangeInput = svt_enc->hdr;


Doesn't this need more parameters to do anything?  The colourspace information at least, maybe the light level and display stuff as well (depending on which sort of HDR you are supporting).
[SUN, Jing] SVT-HEVC doesn't need those. Please refer to https://github.com/intel/SVT-HEVC/tree/master/Source/App for more details.

> +    }

> +

> +    if (svt_enc->vui_info)

> +        param->videoUsabilityInfo = svt_enc->vui_info;

> +

> +    if (svt_enc->la_depth != -1)

> +        param->lookAheadDistance  = svt_enc->la_depth;

> +

> +    if (ten_bits) {

> +        param->encoderBitDepth        = 10;

> +    }

> +

> +    ret = alloc_buffer(param, svt_enc);

> +

> +    return ret;

> +}

> +

> +static void read_in_data(EB_H265_ENC_CONFIGURATION *config,

> +                         const AVFrame *frame,

> +                         EB_BUFFERHEADERTYPE *headerPtr) {

> +    uint8_t is16bit = config->encoderBitDepth > 8;

> +    uint64_t luma_size =

> +        (uint64_t)config->sourceWidth * config->sourceHeight<< is16bit;

> +    EB_H265_ENC_INPUT *in_data = (EB_H265_ENC_INPUT 

> +*)headerPtr->pBuffer;

> +

> +    // support yuv420p and yuv420p010

> +    in_data->luma = frame->data[0];

> +    in_data->cb   = frame->data[1];

> +    in_data->cr   = frame->data[2];

> +

> +    // stride info

> +    in_data->yStride  = frame->linesize[0] >> is16bit;

> +    in_data->cbStride = frame->linesize[1] >> is16bit;

> +    in_data->crStride = frame->linesize[2] >> is16bit;

> +

> +    headerPtr->nFilledLen   += luma_size * 3/2u;


What does this value actually do?  Since the planes need not be in the same buffer, it doesn't seem likely to be meaningful.
[SUN, Jing] Those values are for the SVT-HEVC lib to get the passed down source frame information.

> +}

> +

> +static av_cold int eb_enc_init(AVCodecContext *avctx) {

> +    SvtContext   *svt_enc = avctx->priv_data;

> +    EB_ERRORTYPE svt_ret;

> +

> +    svt_enc->eos_flag = EOS_NOT_REACHED;

> +

> +    svt_ret = EbInitHandle(&svt_enc->svt_handle, svt_enc, &svt_enc->enc_params);

> +    if (svt_ret != EB_ErrorNone) {

> +        av_log(avctx, AV_LOG_ERROR, "Error init encoder handle\n");

> +        goto failed;

> +    }

> +

> +    svt_ret = config_enc_params(&svt_enc->enc_params, avctx);

> +    if (svt_ret != EB_ErrorNone) {

> +        av_log(avctx, AV_LOG_ERROR, "Error configure encoder parameters\n");

> +        goto failed_init_handle;

> +    }

> +

> +    svt_ret = EbH265EncSetParameter(svt_enc->svt_handle, &svt_enc->enc_params);

> +    if (svt_ret != EB_ErrorNone) {

> +        av_log(avctx, AV_LOG_ERROR, "Error setting encoder parameters\n");

> +        goto failed_init_handle;

> +    }

> +

> +    svt_ret = EbInitEncoder(svt_enc->svt_handle);

> +    if (svt_ret != EB_ErrorNone) {

> +        av_log(avctx, AV_LOG_ERROR, "Error init encoder\n");

> +        goto failed_init_handle;

> +    }

> +

> +    if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {

> +        EB_BUFFERHEADERTYPE *headerPtr = NULL;


Please avoid using camelCase unless required by the API.
[SUN, Jing] To be modified in v7.

> +

> +        svt_ret = EbH265EncStreamHeader(svt_enc->svt_handle, &headerPtr);

> +        if (svt_ret != EB_ErrorNone) {

> +            av_log(avctx, AV_LOG_ERROR, "Error when build stream header.\n");

> +            goto failed_init_enc;

> +        }

> +

> +        avctx->extradata_size = headerPtr->nFilledLen;

> +        avctx->extradata = av_mallocz(avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);

> +        if (!avctx->extradata) {

> +            av_log(avctx, AV_LOG_ERROR,

> +                   "Cannot allocate HEVC header of size %d.\n", avctx->extradata_size);

> +            svt_ret = EB_ErrorInsufficientResources;

> +            goto failed_init_enc;

> +        }

> +        memcpy(avctx->extradata, headerPtr->pBuffer, avctx->extradata_size);

> +    }

> +

> +    return 0;

> +

> +failed_init_enc:

> +    EbDeinitEncoder(svt_enc->svt_handle);

> +failed_init_handle:

> +    EbDeinitHandle(svt_enc->svt_handle);

> +failed:

> +    free_buffer(svt_enc);

> +    svt_enc->svt_handle = NULL;

> +    svt_enc = NULL;

> +    return error_mapping(svt_ret);

> +}

> +

> +static int eb_send_frame(AVCodecContext *avctx, const AVFrame *frame) 

> +{

> +    SvtContext           *svt_enc = avctx->priv_data;

> +    EB_BUFFERHEADERTYPE  *headerPtr = svt_enc->in_buf;

> +

> +    if (!frame) {

> +        EB_BUFFERHEADERTYPE headerPtrLast;

> +        headerPtrLast.nAllocLen   = 0;

> +        headerPtrLast.nFilledLen  = 0;

> +        headerPtrLast.nTickCount  = 0;

> +        headerPtrLast.pAppPrivate = NULL;

> +        headerPtrLast.pBuffer     = NULL;

> +        headerPtrLast.nFlags      = EB_BUFFERFLAG_EOS;

> +

> +        EbH265EncSendPicture(svt_enc->svt_handle, &headerPtrLast);


Can this call ever fail?  The API indicates it returns an error code.
[SUN, Jing] It always returns no-error. Please refer to https://github.com/intel/SVT-HEVC/blob/master/Source/Lib/Codec/EbEncHandle.c. I will ask the SVT HEVC lib developers to consider improving that and will modify the logic here after it's done.

> +        svt_enc->eos_flag = EOS_REACHED;

> +        av_log(avctx, AV_LOG_DEBUG, "Finish sending frames!!!\n");

> +        return 0;

> +    }

> +

> +    read_in_data(&svt_enc->enc_params, frame, headerPtr);

> +

> +    headerPtr->nFlags       = 0;

> +    headerPtr->pAppPrivate  = NULL;

> +    headerPtr->pts          = frame->pts;

> +    switch (frame->pict_type) {

> +    case AV_PICTURE_TYPE_I:

> +        headerPtr->sliceType = svt_enc->forced_idr > 0 ? EB_IDR_PICTURE : EB_I_PICTURE;

> +        break;

> +    case AV_PICTURE_TYPE_P:

> +        headerPtr->sliceType = EB_P_PICTURE;

> +        break;

> +    case AV_PICTURE_TYPE_B:

> +        headerPtr->sliceType = EB_B_PICTURE;

> +        break;

> +    default:

> +        headerPtr->sliceType = EB_INVALID_PICTURE;

> +        break;

> +    }

> +    EbH265EncSendPicture(svt_enc->svt_handle, headerPtr);


Same comment as above about failure.

With the current logic it looks like this API call is entirely synchronous, so in the case of lookahead the input will immediately be copied somewhere else inside the encoder so that the input frame can be discarded on return.  Is that correct?
[SUN, Jing] It's not synchronous, Calling EbH265EncSendPicture may or may not trigger the actual encoding, which depends on several internal/external indicators.

> +    return 0;

> +}

> +

> +static int eb_receive_packet(AVCodecContext *avctx, AVPacket *pkt) {

> +    SvtContext  *svt_enc = avctx->priv_data;

> +    EB_BUFFERHEADERTYPE   *headerPtr = NULL;

> +    EB_ERRORTYPE          svt_ret;

> +    int ret = 0;

> +

> +    if (EOS_TOTRIGGER == svt_enc->eos_flag) {

> +        pkt = NULL;

> +        return AVERROR_EOF;

> +    }

> +

> +    svt_ret = EbH265GetPacket(svt_enc->svt_handle, &headerPtr, svt_enc->eos_flag);

> +    if (svt_ret == EB_NoErrorEmptyQueue)

> +        return AVERROR(EAGAIN);

> +

> +    if ((ret = ff_alloc_packet2(avctx, pkt, headerPtr->nFilledLen, 0)) < 0) {

> +        av_log(avctx, AV_LOG_ERROR, "Failed to allocate output packet.\n");

> +        EbH265ReleaseOutBuffer(&headerPtr);

> +        return ret;

> +    }

> +

> +    memcpy(pkt->data, headerPtr->pBuffer, headerPtr->nFilledLen);

> +

> +    pkt->size = headerPtr->nFilledLen;

> +    pkt->pts  = headerPtr->pts;

> +    pkt->dts  = headerPtr->dts;

> +    if (headerPtr->sliceType == EB_IDR_PICTURE)> +        pkt->flags |= AV_PKT_FLAG_KEY;


Should CRA pictures also be marked as key frames?
[SUN, Jing] To be modified in v7.

> +    if (headerPtr->sliceType == EB_NON_REF_PICTURE)

> +        pkt->flags |= AV_PKT_FLAG_DISPOSABLE;

> +

> +    EbH265ReleaseOutBuffer(&headerPtr);

> +

> +    if (EB_BUFFERFLAG_EOS == headerPtr->nFlags)

> +        svt_enc->eos_flag = EOS_TOTRIGGER;

> +

> +    return 0;

> +}

> +

> +static av_cold int eb_enc_close(AVCodecContext *avctx) {

> +    SvtContext *svt_enc = avctx->priv_data;

> +

> +    if (svt_enc->svt_handle) {

> +        EbDeinitEncoder(svt_enc->svt_handle);

> +        EbDeinitHandle(svt_enc->svt_handle);

> +        svt_enc->svt_handle = NULL;

> +    }

> +

> +    if (svt_enc) {


If this test is false then you already invoked undefined behaviour by dereferencing the pointer above.
[SUN, Jing] To be modified in v7.

> +        free_buffer(svt_enc);

> +        svt_enc = NULL;

> +    }

> +

> +    return 0;

> +}

> +

> +#define OFFSET(x) offsetof(SvtContext, x) #define VE 

> +AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM static const 

> +AVOption options[] = {

> +    { "vui", "Enable vui info", OFFSET(vui_info),

> +      AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, VE },

> +

> +    { "aud", "Include AUD", OFFSET(aud),

> +      AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },

> +

> +    { "hielevel", "Hierarchical prediction levels setting", OFFSET(hierarchical_level),

> +      AV_OPT_TYPE_INT, { .i64 = 4 }, 1, 4, VE , "hielevel"},

> +        { "flat",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 },  INT_MIN, INT_MAX, VE, "hielevel" },

> +        { "2level", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 2 },  INT_MIN, INT_MAX, VE, "hielevel" },

> +        { "3level", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 3 },  INT_MIN, INT_MAX, VE, "hielevel" },

> +        { "4level", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 4 },  

> + INT_MIN, INT_MAX, VE, "hielevel" },

> +

> +    { "la_depth", "Look ahead distance [0, 256]", OFFSET(la_depth),

> +      AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 256, VE },

> +

> +    { "preset", "Encoding preset [0, 12] (e,g, for subjective quality tuning mode and >=4k resolution), [0, 10] (for >= 1080p resolution), [0, 9] (for all resolution and modes)",

> +      OFFSET(enc_mode), AV_OPT_TYPE_INT, { .i64 = 9 }, 0, 12, VE },

> +

> +    { "profile", "Profile setting, Main Still Picture Profile not supported", OFFSET(profile),

> +      AV_OPT_TYPE_INT, { .i64 = FF_PROFILE_HEVC_MAIN_10 }, 

> + FF_PROFILE_HEVC_MAIN, FF_PROFILE_HEVC_MAIN_10, VE, "profile"},

> +

> +#define PROFILE(name, value)  name, NULL, 0, AV_OPT_TYPE_CONST, \

> +    { .i64 = value }, 0, 0, VE, "profile"

> +        { PROFILE("main",   FF_PROFILE_HEVC_MAIN)    },

> +        { PROFILE("main10", FF_PROFILE_HEVC_MAIN_10) }, #undef 

> +PROFILE

> +

> +    { "tier", "Set tier (general_tier_flag)", OFFSET(tier),

> +      AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE, "tier" },

> +        { "main", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, VE, "tier" },

> +        { "high", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, VE, 

> + "tier" },

> +

> +    { "level", "Set level (level_idc)", OFFSET(level),

> +      AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 0xff, VE, "level" },

> +

> +#define LEVEL(name, value) name, NULL, 0, AV_OPT_TYPE_CONST, \

> +      { .i64 = value }, 0, 0, VE, "level"

> +        { LEVEL("1",   10) },

> +        { LEVEL("2",   20) },

> +        { LEVEL("2.1", 21) },

> +        { LEVEL("3",   30) },

> +        { LEVEL("3.1", 31) },

> +        { LEVEL("4",   40) },

> +        { LEVEL("4.1", 41) },

> +        { LEVEL("5",   50) },

> +        { LEVEL("5.1", 51) },

> +        { LEVEL("5.2", 52) },

> +        { LEVEL("6",   60) },

> +        { LEVEL("6.1", 61) },

> +        { LEVEL("6.2", 62) },


These numbers look suspicious.  Is it actually meant to match general_level_idc (which is level value * 30), or is it using the H.264 convention?  If the latter, please remove the reference to level_idc (because it's wrong) and add a comment explaining the values.
[SUN, Jing] Those definitions come from SVT-HEVC. 

> +#undef LEVEL

> +

> +    { "rc", "Bit rate control mode", OFFSET(rc_mode),

> +      AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE , "rc"},

> +        { "cqp", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 0 },  INT_MIN, INT_MAX, VE, "rc" },

> +        { "vbr", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 },  INT_MIN, 

> + INT_MAX, VE, "rc" },


Is there an enum for this somewhere?
[SUN, Jing] In options_table.h, it is also "rc".
Options_table.h (ffmpeg\libavcodec):{"rc", "rate control", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_RC }, INT_MIN, INT_MAX, V|E, "debug"},

> +

> +    { "qp", "QP value for intra frames", OFFSET(qp),

> +      AV_OPT_TYPE_INT, { .i64 = 32 }, 0, 51, VE },


The use above suggests this applies to all frames in CQP mode, and maybe not at all in VBR mode.
Is the value mapped at all to make the real QP here?  That is, if I wanted to encode a 10-bit input at very high quality with QP -6, is that possible and what would I set this value to?
[SUN, Jing] The min value is 0, so I think -6 is not allowed.

> +

> +    { "sc_detection", "Scene change detection", OFFSET(scd),

> +      AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, VE },

> +

> +    { "tune", "Quality tuning mode", OFFSET(tune), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 2, VE, "tune" },

> +        { "sq", "Visually optimized mode", 0,

> +          AV_OPT_TYPE_CONST, { .i64 = 0 },  INT_MIN, INT_MAX, VE, "tune" },

> +        { "oq",  "PSNR / SSIM optimized mode",  0,

> +          AV_OPT_TYPE_CONST, { .i64 = 1 },  INT_MIN, INT_MAX, VE, "tune" },

> +        { "vmaf", "VMAF optimized mode", 0,

> +          AV_OPT_TYPE_CONST, { .i64 = 2 },  INT_MIN, INT_MAX, VE, 

> + "tune" },


Is there an enum for this somewhere?
[SUN, Jing] The same as "rc".

> +

> +    { "bl_mode", "Random Access Prediction Structure type setting", OFFSET(base_layer_switch_mode),

> +      AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },


The help text here doesn't say anything about what the option actually does.
For bool variables, you generally want the help text to roughly fit in a sentence like "when you set this option, it will <help text>" (like "include AUD" or "enable VUI info" above).
[SUN, Jing] That comes from the SVT HEVC user guide.
> +

> +    { "forced-idr", "If forcing keyframes, force them as IDR frames.", OFFSET(forced_idr),

> +      AV_OPT_TYPE_BOOL,   { .i64 = 0 }, -1, 1, VE },

> +

> +    { "hdr", "High dynamic range input", OFFSET(hdr),

> +      AV_OPT_TYPE_BOOL,   { .i64 = 0 }, 0, 1, VE },


As noted above, this option needs more metadata somewhere.
[SUN, Jing] That comes from the SVT HEVC user guide.

> +

> +    {NULL},

> +};

> +

> +static const AVClass class = {

> +    .class_name = "libsvt_hevc",

> +    .item_name  = av_default_item_name,

> +    .option     = options,

> +    .version    = LIBAVUTIL_VERSION_INT,

> +};

> +

> +static const AVCodecDefault eb_enc_defaults[] = {

> +    { "b",         "7M"    },

> +    { "flags",     "+cgop" },

> +    { "qmin",      "10"    },

> +    { "qmax",      "48"    },

> +    { "g",         "-2"    },


What does GOP size -2 mean?
[SUN, Jing] -2 is defined by SVT HEVC, meaning we expecting the SVT HEVC lib to choose a value.

> +    { NULL },

> +};

> +

> +AVCodec ff_libsvt_hevc_encoder = {

> +    .name           = "libsvt_hevc",

> +    .long_name      = NULL_IF_CONFIG_SMALL("SVT-HEVC(Scalable Video Technology for HEVC) encoder"),

> +    .priv_data_size = sizeof(SvtContext),

> +    .type           = AVMEDIA_TYPE_VIDEO,

> +    .id             = AV_CODEC_ID_HEVC,

> +    .init           = eb_enc_init,

> +    .send_frame     = eb_send_frame,

> +    .receive_packet = eb_receive_packet,

> +    .close          = eb_enc_close,

> +    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AUTO_THREADS,

> +    .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P,

> +                                                    AV_PIX_FMT_YUV420P10,

> +                                                    AV_PIX_FMT_NONE },

> +    .priv_class     = &class,

> +    .defaults       = eb_enc_defaults,

> +    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,

> +    .wrapper_name   = "libsvt_hevc",

> +};

> 


- Mark
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

Patch hide | download patch | download mbox

diff --git a/configure b/configure
index dcead3a..36bc8c1 100755
--- a/configure
+++ b/configure
@@ -264,6 +264,7 @@  External library support:
   --enable-libspeex        enable Speex de/encoding via libspeex [no]
   --enable-libsrt          enable Haivision SRT protocol via libsrt [no]
   --enable-libssh          enable SFTP protocol via libssh [no]
+  --enable-libsvthevc      enable HEVC encoding via svt [no]
   --enable-libtensorflow   enable TensorFlow as a DNN module backend
                            for DNN based filters like sr [no]
   --enable-libtesseract    enable Tesseract, needed for ocr filter [no]
@@ -1784,6 +1785,7 @@  EXTERNAL_LIBRARY_LIST="
     libspeex
     libsrt
     libssh
+    libsvthevc
     libtensorflow
     libtesseract
     libtheora
@@ -3173,6 +3175,7 @@  libshine_encoder_select="audio_frame_queue"
 libspeex_decoder_deps="libspeex"
 libspeex_encoder_deps="libspeex"
 libspeex_encoder_select="audio_frame_queue"
+libsvt_hevc_encoder_deps="libsvthevc"
 libtheora_encoder_deps="libtheora"
 libtwolame_encoder_deps="libtwolame"
 libvo_amrwbenc_encoder_deps="libvo_amrwbenc"
@@ -6209,6 +6212,7 @@  enabled libsoxr           && require libsoxr soxr.h soxr_create -lsoxr
 enabled libssh            && require_pkg_config libssh libssh libssh/sftp.h sftp_init
 enabled libspeex          && require_pkg_config libspeex speex speex/speex.h speex_decoder_init
 enabled libsrt            && require_pkg_config libsrt "srt >= 1.3.0" srt/srt.h srt_socket
+enabled libsvthevc        && require_pkg_config libsvthevc SvtHevcEnc svt-hevc/EbApi.h EbInitHandle
 enabled libtensorflow     && require libtensorflow tensorflow/c/c_api.h TF_Version -ltensorflow
 enabled libtesseract      && require_pkg_config libtesseract tesseract tesseract/capi.h TessBaseAPICreate
 enabled libtheora         && require libtheora theora/theoraenc.h th_info_init -ltheoraenc -ltheoradec -logg
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 15c43a8..c93e545 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -987,6 +987,7 @@  OBJS-$(CONFIG_LIBOPUS_ENCODER)            += libopusenc.o libopus.o     \
 OBJS-$(CONFIG_LIBSHINE_ENCODER)           += libshine.o
 OBJS-$(CONFIG_LIBSPEEX_DECODER)           += libspeexdec.o
 OBJS-$(CONFIG_LIBSPEEX_ENCODER)           += libspeexenc.o
+OBJS-$(CONFIG_LIBSVT_HEVC_ENCODER)        += libsvt_hevc.o
 OBJS-$(CONFIG_LIBTHEORA_ENCODER)          += libtheoraenc.o
 OBJS-$(CONFIG_LIBTWOLAME_ENCODER)         += libtwolame.o
 OBJS-$(CONFIG_LIBVO_AMRWBENC_ENCODER)     += libvo-amrwbenc.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index b26aeca..e93f66f 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -703,6 +703,7 @@  extern AVCodec ff_librsvg_decoder;
 extern AVCodec ff_libshine_encoder;
 extern AVCodec ff_libspeex_encoder;
 extern AVCodec ff_libspeex_decoder;
+extern AVCodec ff_libsvt_hevc_encoder;
 extern AVCodec ff_libtheora_encoder;
 extern AVCodec ff_libtwolame_encoder;
 extern AVCodec ff_libvo_amrwbenc_encoder;
diff --git a/libavcodec/libsvt_hevc.c b/libavcodec/libsvt_hevc.c
new file mode 100644
index 0000000..97bd204
--- /dev/null
+++ b/libavcodec/libsvt_hevc.c
@@ -0,0 +1,546 @@ 
+/*
+* Scalable Video Technology for HEVC encoder library plugin
+*
+* Copyright (c) 2018 Intel Corporation
+*
+* This file is part of FFmpeg.
+*
+* FFmpeg is free software; you can redistribute it and/or
+* modify it under the terms of the GNU Lesser General Public
+* License as published by the Free Software Foundation; either
+* version 2.1 of the License, or (at your option) any later version.
+*
+* FFmpeg is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+* Lesser General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public
+* License along with this program; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#include "svt-hevc/EbErrorCodes.h"
+#include "svt-hevc/EbTime.h"
+#include "svt-hevc/EbApi.h"
+
+#include "libavutil/common.h"
+#include "libavutil/frame.h"
+#include "libavutil/opt.h"
+
+#include "internal.h"
+#include "avcodec.h"
+
+typedef enum eos_status {
+    EOS_NOT_REACHED = 0,
+    EOS_REACHED,
+    EOS_TOTRIGGER
+}EOS_STATUS;
+
+typedef struct SvtContext {
+    AVClass     *class;
+
+    EB_H265_ENC_CONFIGURATION  enc_params;
+    EB_COMPONENTTYPE           *svt_handle;
+
+    EB_BUFFERHEADERTYPE        *in_buf;
+    int                         raw_size;
+
+    EOS_STATUS eos_flag;
+
+    // User options.
+    int vui_info;
+    int hierarchical_level;
+    int la_depth;
+    int enc_mode;
+    int rc_mode;
+    int scd;
+    int tune;
+    int qp;
+    int hdr;
+
+    int forced_idr;
+
+    int aud;
+
+    int profile;
+    int tier;
+    int level;
+
+    int base_layer_switch_mode;
+} SvtContext;
+
+static int error_mapping(EB_ERRORTYPE svt_ret)
+{
+    int err;
+
+    switch (svt_ret) {
+    case EB_ErrorInsufficientResources:
+        err = AVERROR(ENOMEM);
+        break;
+
+    case EB_ErrorUndefined:
+    case EB_ErrorInvalidComponent:
+    case EB_ErrorBadParameter:
+        err = AVERROR(EINVAL);
+        break;
+
+    case EB_ErrorDestroyThreadFailed:
+    case EB_ErrorSemaphoreUnresponsive:
+    case EB_ErrorDestroySemaphoreFailed:
+    case EB_ErrorCreateMutexFailed:
+    case EB_ErrorMutexUnresponsive:
+    case EB_ErrorDestroyMutexFailed:
+        err = AVERROR_EXTERNAL;
+            break;
+
+    case EB_NoErrorEmptyQueue:
+        err = AVERROR(EAGAIN);
+
+    case EB_ErrorNone:
+        err = 0;
+        break;
+
+    default:
+        err = AVERROR_UNKNOWN;
+    }
+
+    return err;
+}
+
+static void free_buffer(SvtContext *svt_enc)
+{
+    if (svt_enc->in_buf) {
+        EB_H265_ENC_INPUT *in_data = (EB_H265_ENC_INPUT *)svt_enc->in_buf->pBuffer;
+        av_freep(&in_data);
+        av_freep(&svt_enc->in_buf);
+    }
+}
+
+static int alloc_buffer(EB_H265_ENC_CONFIGURATION *config, SvtContext *svt_enc)
+{
+    const int    pack_mode_10bit   =
+        (config->encoderBitDepth > 8) && (config->compressedTenBitFormat == 0) ? 1 : 0;
+    const size_t luma_size_8bit    =
+        config->sourceWidth * config->sourceHeight * (1 << pack_mode_10bit);
+    const size_t luma_size_10bit   =
+        (config->encoderBitDepth > 8 && pack_mode_10bit == 0) ? luma_size_8bit : 0;
+
+    EB_H265_ENC_INPUT *in_data;
+
+    svt_enc->raw_size = (luma_size_8bit + luma_size_10bit) * 3 / 2;
+
+    // allocate buffer for in and out
+    svt_enc->in_buf           = av_mallocz(sizeof(*svt_enc->in_buf));
+    if (!svt_enc->in_buf)
+        goto failed;
+
+    in_data  = av_mallocz(sizeof(*in_data));
+    if (!in_data)
+        goto failed;
+    svt_enc->in_buf->pBuffer  = (unsigned char *)in_data;
+
+    svt_enc->in_buf->nSize        = sizeof(*svt_enc->in_buf);
+    svt_enc->in_buf->pAppPrivate  = NULL;
+
+    return 0;
+
+failed:
+    free_buffer(svt_enc);
+    return AVERROR(ENOMEM);
+}
+
+static int config_enc_params(EB_H265_ENC_CONFIGURATION *param,
+                             AVCodecContext *avctx)
+{
+    SvtContext *svt_enc = avctx->priv_data;
+    int             ret;
+    int        ten_bits = 0;
+
+    param->sourceWidth     = avctx->width;
+    param->sourceHeight    = avctx->height;
+
+    if (avctx->pix_fmt == AV_PIX_FMT_YUV420P10LE) {
+        av_log(avctx, AV_LOG_DEBUG , "Encoder 10 bits depth input\n");
+        // Disable Compressed 10-bit format default
+        //
+        // SVT-HEVC support a compressed 10-bit format allowing the
+        // software to achieve a higher speed and channel density levels.
+        // The conversion between the 10-bit yuv420p10le and the compressed
+        // 10-bit format is a lossless operation. But in FFmpeg, we usually
+        // didn't use this format
+        param->compressedTenBitFormat = 0;
+        ten_bits = 1;
+    }
+
+    // Update param from options
+    param->hierarchicalLevels     = svt_enc->hierarchical_level - 1;
+    param->encMode                = svt_enc->enc_mode;
+    param->profile                = svt_enc->profile;
+    param->tier                   = svt_enc->tier;
+    param->level                  = svt_enc->level;
+    param->rateControlMode        = svt_enc->rc_mode;
+    param->sceneChangeDetection   = svt_enc->scd;
+    param->tune                   = svt_enc->tune;
+    param->baseLayerSwitchMode    = svt_enc->base_layer_switch_mode;
+    param->qp                     = svt_enc->qp;
+    param->accessUnitDelimiter    = svt_enc->aud;
+
+    param->targetBitRate          = avctx->bit_rate;
+    if (avctx->gop_size > 0)
+        param->intraPeriodLength  = avctx->gop_size - 1;
+
+    if (avctx->framerate.num > 0 && avctx->framerate.den > 0) {
+        param->frameRateNumerator     = avctx->framerate.num;
+        param->frameRateDenominator   = avctx->framerate.den * avctx->ticks_per_frame;
+    } else {
+        param->frameRateNumerator     = avctx->time_base.den;
+        param->frameRateDenominator   = avctx->time_base.num * avctx->ticks_per_frame;
+    }
+
+    if (param->rateControlMode) {
+        param->maxQpAllowed       = avctx->qmax;
+        param->minQpAllowed       = avctx->qmin;
+    }
+
+    param->intraRefreshType       =
+        !!(avctx->flags & AV_CODEC_FLAG_CLOSED_GOP) + 1;
+
+    // is it repeat headers for MP4 or Annex-b
+    if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER)
+        param->codeVpsSpsPps          = 0;
+    else
+        param->codeVpsSpsPps          = 1;
+
+    param->codeEosNal             = 1;
+
+    if (svt_enc->hdr) {
+        svt_enc->vui_info = 1;
+        param->highDynamicRangeInput = svt_enc->hdr;
+    }
+
+    if (svt_enc->vui_info)
+        param->videoUsabilityInfo = svt_enc->vui_info;
+
+    if (svt_enc->la_depth != -1)
+        param->lookAheadDistance  = svt_enc->la_depth;
+
+    if (ten_bits) {
+        param->encoderBitDepth        = 10;
+    }
+
+    ret = alloc_buffer(param, svt_enc);
+
+    return ret;
+}
+
+static void read_in_data(EB_H265_ENC_CONFIGURATION *config,
+                         const AVFrame *frame,
+                         EB_BUFFERHEADERTYPE *headerPtr)
+{
+    uint8_t is16bit = config->encoderBitDepth > 8;
+    uint64_t luma_size =
+        (uint64_t)config->sourceWidth * config->sourceHeight<< is16bit;
+    EB_H265_ENC_INPUT *in_data = (EB_H265_ENC_INPUT *)headerPtr->pBuffer;
+
+    // support yuv420p and yuv420p010
+    in_data->luma = frame->data[0];
+    in_data->cb   = frame->data[1];
+    in_data->cr   = frame->data[2];
+
+    // stride info
+    in_data->yStride  = frame->linesize[0] >> is16bit;
+    in_data->cbStride = frame->linesize[1] >> is16bit;
+    in_data->crStride = frame->linesize[2] >> is16bit;
+
+    headerPtr->nFilledLen   += luma_size * 3/2u;
+}
+
+static av_cold int eb_enc_init(AVCodecContext *avctx)
+{
+    SvtContext   *svt_enc = avctx->priv_data;
+    EB_ERRORTYPE svt_ret;
+
+    svt_enc->eos_flag = EOS_NOT_REACHED;
+
+    svt_ret = EbInitHandle(&svt_enc->svt_handle, svt_enc, &svt_enc->enc_params);
+    if (svt_ret != EB_ErrorNone) {
+        av_log(avctx, AV_LOG_ERROR, "Error init encoder handle\n");
+        goto failed;
+    }
+
+    svt_ret = config_enc_params(&svt_enc->enc_params, avctx);
+    if (svt_ret != EB_ErrorNone) {
+        av_log(avctx, AV_LOG_ERROR, "Error configure encoder parameters\n");
+        goto failed_init_handle;
+    }
+
+    svt_ret = EbH265EncSetParameter(svt_enc->svt_handle, &svt_enc->enc_params);
+    if (svt_ret != EB_ErrorNone) {
+        av_log(avctx, AV_LOG_ERROR, "Error setting encoder parameters\n");
+        goto failed_init_handle;
+    }
+
+    svt_ret = EbInitEncoder(svt_enc->svt_handle);
+    if (svt_ret != EB_ErrorNone) {
+        av_log(avctx, AV_LOG_ERROR, "Error init encoder\n");
+        goto failed_init_handle;
+    }
+
+    if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
+        EB_BUFFERHEADERTYPE *headerPtr = NULL;
+
+        svt_ret = EbH265EncStreamHeader(svt_enc->svt_handle, &headerPtr);
+        if (svt_ret != EB_ErrorNone) {
+            av_log(avctx, AV_LOG_ERROR, "Error when build stream header.\n");
+            goto failed_init_enc;
+        }
+
+        avctx->extradata_size = headerPtr->nFilledLen;
+        avctx->extradata = av_mallocz(avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
+        if (!avctx->extradata) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Cannot allocate HEVC header of size %d.\n", avctx->extradata_size);
+            svt_ret = EB_ErrorInsufficientResources;
+            goto failed_init_enc;
+        }
+        memcpy(avctx->extradata, headerPtr->pBuffer, avctx->extradata_size);
+    }
+
+    return 0;
+
+failed_init_enc:
+    EbDeinitEncoder(svt_enc->svt_handle);
+failed_init_handle:
+    EbDeinitHandle(svt_enc->svt_handle);
+failed:
+    free_buffer(svt_enc);
+    svt_enc->svt_handle = NULL;
+    svt_enc = NULL;
+    return error_mapping(svt_ret);
+}
+
+static int eb_send_frame(AVCodecContext *avctx, const AVFrame *frame)
+{
+    SvtContext           *svt_enc = avctx->priv_data;
+    EB_BUFFERHEADERTYPE  *headerPtr = svt_enc->in_buf;
+
+    if (!frame) {
+        EB_BUFFERHEADERTYPE headerPtrLast;
+        headerPtrLast.nAllocLen   = 0;
+        headerPtrLast.nFilledLen  = 0;
+        headerPtrLast.nTickCount  = 0;
+        headerPtrLast.pAppPrivate = NULL;
+        headerPtrLast.pBuffer     = NULL;
+        headerPtrLast.nFlags      = EB_BUFFERFLAG_EOS;
+
+        EbH265EncSendPicture(svt_enc->svt_handle, &headerPtrLast);
+        svt_enc->eos_flag = EOS_REACHED;
+        av_log(avctx, AV_LOG_DEBUG, "Finish sending frames!!!\n");
+        return 0;
+    }
+
+    read_in_data(&svt_enc->enc_params, frame, headerPtr);
+
+    headerPtr->nFlags       = 0;
+    headerPtr->pAppPrivate  = NULL;
+    headerPtr->pts          = frame->pts;
+    switch (frame->pict_type) {
+    case AV_PICTURE_TYPE_I:
+        headerPtr->sliceType = svt_enc->forced_idr > 0 ? EB_IDR_PICTURE : EB_I_PICTURE;
+        break;
+    case AV_PICTURE_TYPE_P:
+        headerPtr->sliceType = EB_P_PICTURE;
+        break;
+    case AV_PICTURE_TYPE_B:
+        headerPtr->sliceType = EB_B_PICTURE;
+        break;
+    default:
+        headerPtr->sliceType = EB_INVALID_PICTURE;
+        break;
+    }
+    EbH265EncSendPicture(svt_enc->svt_handle, headerPtr);
+
+    return 0;
+}
+
+static int eb_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
+{
+    SvtContext  *svt_enc = avctx->priv_data;
+    EB_BUFFERHEADERTYPE   *headerPtr = NULL;
+    EB_ERRORTYPE          svt_ret;
+    int ret = 0;
+
+    if (EOS_TOTRIGGER == svt_enc->eos_flag) {
+        pkt = NULL;
+        return AVERROR_EOF;
+    }
+
+    svt_ret = EbH265GetPacket(svt_enc->svt_handle, &headerPtr, svt_enc->eos_flag);
+    if (svt_ret == EB_NoErrorEmptyQueue)
+        return AVERROR(EAGAIN);
+
+    if ((ret = ff_alloc_packet2(avctx, pkt, headerPtr->nFilledLen, 0)) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to allocate output packet.\n");
+        EbH265ReleaseOutBuffer(&headerPtr);
+        return ret;
+    }
+
+    memcpy(pkt->data, headerPtr->pBuffer, headerPtr->nFilledLen);
+
+    pkt->size = headerPtr->nFilledLen;
+    pkt->pts  = headerPtr->pts;
+    pkt->dts  = headerPtr->dts;
+    if (headerPtr->sliceType == EB_IDR_PICTURE)
+        pkt->flags |= AV_PKT_FLAG_KEY;
+    if (headerPtr->sliceType == EB_NON_REF_PICTURE)
+        pkt->flags |= AV_PKT_FLAG_DISPOSABLE;
+
+    EbH265ReleaseOutBuffer(&headerPtr);
+
+    if (EB_BUFFERFLAG_EOS == headerPtr->nFlags)
+        svt_enc->eos_flag = EOS_TOTRIGGER;
+
+    return 0;
+}
+
+static av_cold int eb_enc_close(AVCodecContext *avctx)
+{
+    SvtContext *svt_enc = avctx->priv_data;
+
+    if (svt_enc->svt_handle) {
+        EbDeinitEncoder(svt_enc->svt_handle);
+        EbDeinitHandle(svt_enc->svt_handle);
+        svt_enc->svt_handle = NULL;
+    }
+
+    if (svt_enc) {
+        free_buffer(svt_enc);
+        svt_enc = NULL;
+    }
+
+    return 0;
+}
+
+#define OFFSET(x) offsetof(SvtContext, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    { "vui", "Enable vui info", OFFSET(vui_info),
+      AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, VE },
+
+    { "aud", "Include AUD", OFFSET(aud),
+      AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
+
+    { "hielevel", "Hierarchical prediction levels setting", OFFSET(hierarchical_level),
+      AV_OPT_TYPE_INT, { .i64 = 4 }, 1, 4, VE , "hielevel"},
+        { "flat",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 },  INT_MIN, INT_MAX, VE, "hielevel" },
+        { "2level", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 2 },  INT_MIN, INT_MAX, VE, "hielevel" },
+        { "3level", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 3 },  INT_MIN, INT_MAX, VE, "hielevel" },
+        { "4level", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 4 },  INT_MIN, INT_MAX, VE, "hielevel" },
+
+    { "la_depth", "Look ahead distance [0, 256]", OFFSET(la_depth),
+      AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 256, VE },
+
+    { "preset", "Encoding preset [0, 12] (e,g, for subjective quality tuning mode and >=4k resolution), [0, 10] (for >= 1080p resolution), [0, 9] (for all resolution and modes)",
+      OFFSET(enc_mode), AV_OPT_TYPE_INT, { .i64 = 9 }, 0, 12, VE },
+
+    { "profile", "Profile setting, Main Still Picture Profile not supported", OFFSET(profile),
+      AV_OPT_TYPE_INT, { .i64 = FF_PROFILE_HEVC_MAIN_10 }, FF_PROFILE_HEVC_MAIN, FF_PROFILE_HEVC_MAIN_10, VE, "profile"},
+
+#define PROFILE(name, value)  name, NULL, 0, AV_OPT_TYPE_CONST, \
+    { .i64 = value }, 0, 0, VE, "profile"
+        { PROFILE("main",   FF_PROFILE_HEVC_MAIN)    },
+        { PROFILE("main10", FF_PROFILE_HEVC_MAIN_10) },
+#undef PROFILE
+
+    { "tier", "Set tier (general_tier_flag)", OFFSET(tier),
+      AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE, "tier" },
+        { "main", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, VE, "tier" },
+        { "high", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, VE, "tier" },
+
+    { "level", "Set level (level_idc)", OFFSET(level),
+      AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 0xff, VE, "level" },
+
+#define LEVEL(name, value) name, NULL, 0, AV_OPT_TYPE_CONST, \
+      { .i64 = value }, 0, 0, VE, "level"
+        { LEVEL("1",   10) },
+        { LEVEL("2",   20) },
+        { LEVEL("2.1", 21) },
+        { LEVEL("3",   30) },
+        { LEVEL("3.1", 31) },
+        { LEVEL("4",   40) },
+        { LEVEL("4.1", 41) },
+        { LEVEL("5",   50) },
+        { LEVEL("5.1", 51) },
+        { LEVEL("5.2", 52) },
+        { LEVEL("6",   60) },
+        { LEVEL("6.1", 61) },
+        { LEVEL("6.2", 62) },
+#undef LEVEL
+
+    { "rc", "Bit rate control mode", OFFSET(rc_mode),
+      AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE , "rc"},
+        { "cqp", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 0 },  INT_MIN, INT_MAX, VE, "rc" },
+        { "vbr", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 },  INT_MIN, INT_MAX, VE, "rc" },
+
+    { "qp", "QP value for intra frames", OFFSET(qp),
+      AV_OPT_TYPE_INT, { .i64 = 32 }, 0, 51, VE },
+
+    { "sc_detection", "Scene change detection", OFFSET(scd),
+      AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, VE },
+
+    { "tune", "Quality tuning mode", OFFSET(tune), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 2, VE, "tune" },
+        { "sq", "Visually optimized mode", 0,
+          AV_OPT_TYPE_CONST, { .i64 = 0 },  INT_MIN, INT_MAX, VE, "tune" },
+        { "oq",  "PSNR / SSIM optimized mode",  0,
+          AV_OPT_TYPE_CONST, { .i64 = 1 },  INT_MIN, INT_MAX, VE, "tune" },
+        { "vmaf", "VMAF optimized mode", 0,
+          AV_OPT_TYPE_CONST, { .i64 = 2 },  INT_MIN, INT_MAX, VE, "tune" },
+
+    { "bl_mode", "Random Access Prediction Structure type setting", OFFSET(base_layer_switch_mode),
+      AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
+
+    { "forced-idr", "If forcing keyframes, force them as IDR frames.", OFFSET(forced_idr),
+      AV_OPT_TYPE_BOOL,   { .i64 = 0 }, -1, 1, VE },
+
+    { "hdr", "High dynamic range input", OFFSET(hdr),
+      AV_OPT_TYPE_BOOL,   { .i64 = 0 }, 0, 1, VE },
+
+    {NULL},
+};
+
+static const AVClass class = {
+    .class_name = "libsvt_hevc",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+static const AVCodecDefault eb_enc_defaults[] = {
+    { "b",         "7M"    },
+    { "flags",     "+cgop" },
+    { "qmin",      "10"    },
+    { "qmax",      "48"    },
+    { "g",         "-2"    },
+    { NULL },
+};
+
+AVCodec ff_libsvt_hevc_encoder = {
+    .name           = "libsvt_hevc",
+    .long_name      = NULL_IF_CONFIG_SMALL("SVT-HEVC(Scalable Video Technology for HEVC) encoder"),
+    .priv_data_size = sizeof(SvtContext),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_HEVC,
+    .init           = eb_enc_init,
+    .send_frame     = eb_send_frame,
+    .receive_packet = eb_receive_packet,
+    .close          = eb_enc_close,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AUTO_THREADS,
+    .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P,
+                                                    AV_PIX_FMT_YUV420P10,
+                                                    AV_PIX_FMT_NONE },
+    .priv_class     = &class,
+    .defaults       = eb_enc_defaults,
+    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
+    .wrapper_name   = "libsvt_hevc",
+};