Message ID | 20240826175132.21894-2-yigithanyigitdevel@gmail.com |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel,v2] avfilter/vf_libvmaf: Add metadata propagation support | expand |
Context | Check | Description |
---|---|---|
yinshiyou/make_loongarch64 | success | Make finished |
yinshiyou/make_fate_loongarch64 | success | Make fate finished |
andriy/make_x86 | success | Make finished |
andriy/make_fate_x86 | success | Make fate finished |
Ping. Thanks > On 26 Aug 2024, at 20:51, Yigithan Yigit <yigithanyigitdevel@gmail.com> wrote: > > --- > libavfilter/vf_libvmaf.c | 328 ++++++++++++++++++++++++++++++++++++++- > 1 file changed, 326 insertions(+), 2 deletions(-) > > diff --git a/libavfilter/vf_libvmaf.c b/libavfilter/vf_libvmaf.c > index f655092b20..e6707aff53 100644 > --- a/libavfilter/vf_libvmaf.c > +++ b/libavfilter/vf_libvmaf.c > @@ -27,8 +27,11 @@ > #include "config_components.h" > > #include <libvmaf.h> > +#include <libvmaf/version.h> > > #include "libavutil/avstring.h" > +#include "libavutil/dict.h" > +#include "libavutil/frame.h" > #include "libavutil/mem.h" > #include "libavutil/opt.h" > #include "libavutil/pixdesc.h" > @@ -46,6 +49,31 @@ > #include "libavutil/hwcontext_cuda_internal.h" > #endif > > +#define VMAF_VERSION_INT_VER(major, minor, patch) \ > + ((major) * 10000 + (minor) * 100 + (patch)) > + > +#if VMAF_VERSION_INT_VER(VMAF_API_VERSION_MAJOR, VMAF_API_VERSION_MINOR, VMAF_API_VERSION_PATCH) > VMAF_VERSION_INT_VER(3, 0, 0) > +#define CONFIG_LIBVMAF_METADATA_ENABLED 1 > +#else > +#define CONFIG_LIBVMAF_METADATA_ENABLED 0 > +#endif > + > +#if CONFIG_LIBVMAF_METADATA_ENABLED > +#include <stdatomic.h> > + > +typedef struct FrameList { > + AVFrame *frame; > + unsigned frame_number; > + unsigned propagated_handlers_cnt; > + struct FrameList *next; > +} FrameList; > + > +typedef struct CallbackStruct { > + struct LIBVMAFContext *s; > + FrameList *frame_list; > +} CallbackStruct; > +#endif > + > typedef struct LIBVMAFContext { > const AVClass *class; > FFFrameSync fs; > @@ -56,8 +84,19 @@ typedef struct LIBVMAFContext { > int n_subsample; > char *model_cfg; > char *feature_cfg; > +#if CONFIG_LIBVMAF_METADATA_ENABLED > + char *metadata_feature_cfg; > + struct { > + VmafMetadataConfiguration *metadata_cfgs; > + unsigned metadata_cfg_cnt; > + } metadata_cfg_list; > + CallbackStruct *cb; > + atomic_uint outlink_eof; > + atomic_uint eof_frame; > +#endif > VmafContext *vmaf; > VmafModel **model; > + int flushed; > unsigned model_cnt; > unsigned frame_cnt; > unsigned bpc; > @@ -77,6 +116,9 @@ static const AVOption libvmaf_options[] = { > {"n_subsample", "Set interval for frame subsampling used when computing vmaf.", OFFSET(n_subsample), AV_OPT_TYPE_INT, {.i64=1}, 1, UINT_MAX, FLAGS}, > {"model", "Set the model to be used for computing vmaf.", OFFSET(model_cfg), AV_OPT_TYPE_STRING, {.str="version=vmaf_v0.6.1"}, 0, 1, FLAGS}, > {"feature", "Set the feature to be used for computing vmaf.", OFFSET(feature_cfg), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS}, > +#if CONFIG_LIBVMAF_METADATA_ENABLED > + {"metadata_handler", "Set the feature to be propagated as metadata.", OFFSET(metadata_feature_cfg), AV_OPT_TYPE_STRING, {.str="name=vmaf"}, 0, 1, FLAGS}, > +#endif > { NULL } > }; > > @@ -105,6 +147,123 @@ static enum VmafPixelFormat pix_fmt_map(enum AVPixelFormat av_pix_fmt) > } > } > > +#if CONFIG_LIBVMAF_METADATA_ENABLED > +static int add_to_frame_list(FrameList **head, AVFrame *frame, unsigned frame_number) > +{ > + FrameList *new_frame = av_malloc(sizeof(FrameList)); > + if (!new_frame) > + return AVERROR(ENOMEM); > + > + new_frame->frame = frame; > + new_frame->frame_number = frame_number; > + new_frame->propagated_handlers_cnt = 0; > + new_frame->next = NULL; > + > + if (*head == NULL) { > + *head = new_frame; > + } else { > + FrameList *current = *head; > + while (current->next != NULL) { > + current = current->next; > + } > + current->next = new_frame; > + } > + > + return 0; > +} > + > +static int remove_from_frame_list(FrameList **frame_list, unsigned frame_number) > +{ > + FrameList *cur = *frame_list; > + FrameList *prev = NULL; > + > + while (cur) { > + if (cur->frame_number == frame_number) { > + if (prev) > + prev->next = cur->next; > + else > + *frame_list = cur->next; > + av_free(cur); > + return 0; > + } > + prev = cur; > + cur = cur->next; > + } > + > + return AVERROR(EINVAL); > +} > + > +static int free_frame_list(FrameList **frame_list) > +{ > + FrameList *cur = *frame_list; > + while (cur) { > + FrameList *next = cur->next; > + av_frame_free(&cur->frame); > + av_free(cur); > + cur = next; > + } > + *frame_list = NULL; > + return 0; > +} > + > +static FrameList* get_frame_from_frame_list(FrameList *frame_list, > + unsigned frame_number) > +{ > + FrameList *cur = frame_list; > + while (cur) { > + if (cur->frame_number == frame_number) > + return cur; > + cur = cur->next; > + } > + return NULL; > +} > + > +static void set_meta(void *data, VmafMetadata *metadata) > +{ > + int err = 0; > + FrameList *current_frame = NULL; > + CallbackStruct *cb = data; > + char value[128], key[128]; > + snprintf(value, sizeof(value), "%0.2f", metadata->score); > + snprintf(key, sizeof(key), "%s.%d", metadata->feature_name, metadata->picture_index); > + > + current_frame = get_frame_from_frame_list(cb->frame_list, metadata->picture_index); > + if (!current_frame) { > + av_log(NULL, AV_LOG_ERROR, "could not find frame with index: %d\n", > + metadata->picture_index); > + return; > + } > + > + err = av_dict_set(¤t_frame->frame->metadata, key, value, 0); > + if (err < 0) > + av_log(NULL, AV_LOG_ERROR, "could not set metadata: %s\n", key); > + > + current_frame->propagated_handlers_cnt++; > + > + if (current_frame->propagated_handlers_cnt == cb->s->metadata_cfg_list.metadata_cfg_cnt) { > + FrameList *cur = cb->frame_list; > + // This code block allows to send frames monotonically > + while(cur && cur->frame_number <= metadata->picture_index) { > + if (cur->propagated_handlers_cnt == cb->s->metadata_cfg_list.metadata_cfg_cnt) { > + FrameList *next; > + // Check outlink is closed > + if (!cb->s->outlink_eof) { > + av_log(cb->s->fs.parent, AV_LOG_DEBUG, "VMAF feature: %d, score: %f\n", cur->frame_number, metadata->score); > + cb->s->eof_frame = cur->frame_number; > + if(ff_filter_frame(cb->s->fs.parent->outputs[0], cur->frame)) > + return; > + } > + next = cur->next; > + remove_from_frame_list(&cb->frame_list, cur->frame_number); > + cur = next; > + } > + else > + break; > + } > + } > +} > +#endif > + > static int copy_picture_data(AVFrame *src, VmafPicture *dst, unsigned bpc) > { > const int bytes_per_value = bpc > 8 ? 2 : 1; > @@ -160,13 +319,28 @@ static int do_vmaf(FFFrameSync *fs) > return AVERROR(ENOMEM); > } > > +#if CONFIG_LIBVMAF_METADATA_ENABLED > + err = add_to_frame_list(&s->cb->frame_list, dist, s->frame_cnt); > + if (err) { > + av_log(s, AV_LOG_ERROR, "problem during add_to_frame_list.\n"); > + return AVERROR(ENOMEM); > + } > +#endif > + > err = vmaf_read_pictures(s->vmaf, &pic_ref, &pic_dist, s->frame_cnt++); > if (err) { > av_log(s, AV_LOG_ERROR, "problem during vmaf_read_pictures.\n"); > return AVERROR(EINVAL); > } > > +#if CONFIG_LIBVMAF_METADATA_ENABLED > + if (s->metadata_cfg_list.metadata_cfg_cnt) > + return 0; > + else > + return ff_filter_frame(ctx->outputs[0], dist); > +#else > return ff_filter_frame(ctx->outputs[0], dist); > +#endif > } > > static AVDictionary **delimited_dict_parse(char *str, unsigned *cnt) > @@ -408,6 +582,83 @@ exit: > return err; > } > > +#if CONFIG_LIBVMAF_METADATA_ENABLED > +static int parse_metadata_handlers(AVFilterContext *ctx) > +{ > + LIBVMAFContext *s = ctx->priv; > + AVDictionary **dict; > + unsigned dict_cnt; > + int err = 0; > + > + if (!s->metadata_feature_cfg) > + return 0; > + > + dict_cnt = 0; > + dict = delimited_dict_parse(s->metadata_feature_cfg, &dict_cnt); > + if (!dict) { > + av_log(ctx, AV_LOG_ERROR, > + "could not parse metadata feature config: %s\n", > + s->metadata_feature_cfg); > + return AVERROR(EINVAL); > + } > + > + for (unsigned i = 0; i < dict_cnt; i++) { > + VmafMetadataConfiguration *metadata_cfg = av_calloc(1, sizeof(*metadata_cfg)); > + const AVDictionaryEntry *e = NULL; > + char *feature_name = NULL; > + > + while (e = av_dict_iterate(dict[i], e)) { > + if (!strcmp(e->key, "name")) { > + metadata_cfg->feature_name = av_strdup(e->value); > + continue; > + } > + } > + > + metadata_cfg->data = s->cb; > + metadata_cfg->callback = &set_meta; > + > + err = vmaf_register_metadata_handler(s->vmaf, *metadata_cfg); > + if (err) { > + av_log(ctx, AV_LOG_ERROR, > + "problem during vmaf_register_metadata_handler: %s\n", > + feature_name); > + goto exit; > + } > + > + s->metadata_cfg_list.metadata_cfgs = av_realloc(s->metadata_cfg_list.metadata_cfgs, > + (s->metadata_cfg_list.metadata_cfg_cnt + 1) * > + sizeof(*s->metadata_cfg_list.metadata_cfgs)); > + if (!s->metadata_cfg_list.metadata_cfgs) { > + err = AVERROR(ENOMEM); > + goto exit; > + } > + > + s->metadata_cfg_list.metadata_cfgs[s->metadata_cfg_list.metadata_cfg_cnt++] = *metadata_cfg; > + } > + > +exit: > + for (unsigned i = 0; i < dict_cnt; i++) { > + if (dict[i]) > + av_dict_free(&dict[i]); > + } > + av_free(dict); > + return err; > +} > + > +static int init_metadata(AVFilterContext *ctx) > +{ > + LIBVMAFContext *s = ctx->priv; > + > + s->cb = av_calloc(1, sizeof(CallbackStruct)); > + if (!s->cb) > + return AVERROR(ENOMEM); > + > + s->cb->s = s; > + > + return 0; > +} > +#endif > + > static enum VmafLogLevel log_level_map(int log_level) > { > switch (log_level) { > @@ -441,6 +692,16 @@ static av_cold int init(AVFilterContext *ctx) > if (err) > return AVERROR(EINVAL); > > +#if CONFIG_LIBVMAF_METADATA_ENABLED > + err = init_metadata(ctx); > + if (err) > + return err; > + > + err = parse_metadata_handlers(ctx); > + if (err) > + return err; > +#endif > + > err = parse_models(ctx); > if (err) > return err; > @@ -518,6 +779,38 @@ static int config_output(AVFilterLink *outlink) > static int activate(AVFilterContext *ctx) > { > LIBVMAFContext *s = ctx->priv; > +#if CONFIG_LIBVMAF_METADATA_ENABLED > + // There are 2 cases for metadata propagation: > + // 1. Where the case that outlink closes > + // 2. Where inlink closes > + // Case 1: > + // In this case we need check outlink somehow for the status in every iteration. > + // If outlink is not wanting frame anymore, we need to proceed with uninit with setting inlink. > + // But nature of multithreading settting eof inside the activate call can make sync issues and > + // can lead to extra propagated frames. Atomic variables are used to avoid this. > + // Case 2: > + // This case relatively easy to handle. Because of calculation of vmaf score takes time > + // So `do_vmaf` buffers many of frames before sending to outlink that causes > + // premature close of outlink. > + // Checking inlink status is enough and if inlink == eof flushing vmaf is enough for this. > + int64_t pts; > + int status, ret = 0; > + > + if (ff_outlink_get_status(ctx->outputs[0])) > + s->outlink_eof = 1; > + > + if (ff_inlink_acknowledge_status(ctx->inputs[0], &status, &pts) && > + ff_inlink_acknowledge_status(ctx->inputs[1], &status, &pts)) { > + if (!s->flushed) { > + ret = vmaf_read_pictures(s->vmaf, NULL, NULL, 0); > + if (ret) > + av_log(ctx, AV_LOG_ERROR, > + "problem flushing libvmaf context.\n"); > + else > + s->flushed = 1; > + } > + } > +#endif > return ff_framesync_activate(&s->fs); > } > > @@ -556,21 +849,52 @@ static av_cold void uninit(AVFilterContext *ctx) > LIBVMAFContext *s = ctx->priv; > int err = 0; > > +#if CONFIG_LIBVMAF_METADATA_ENABLED > + if (!s->outlink_eof) > + s->outlink_eof = 1; > +#endif > + > ff_framesync_uninit(&s->fs); > > if (!s->frame_cnt) > goto clean_up; > > - err = vmaf_read_pictures(s->vmaf, NULL, NULL, 0); > + if (!s->flushed) { > + err = vmaf_read_pictures(s->vmaf, NULL, NULL, 0); > + if (err) { > + av_log(ctx, AV_LOG_ERROR, > + "problem flushing libvmaf context.\n"); > + } else > + s->flushed = 1; > + } > + > +#if CONFIG_LIBVMAF_METADATA_ENABLED > + if (s->metadata_cfg_list.metadata_cfgs) { > + for (unsigned i = 0; i < s->metadata_cfg_list.metadata_cfg_cnt; i++) { > + av_free(s->metadata_cfg_list.metadata_cfgs[i].feature_name); > + } > + av_free(s->metadata_cfg_list.metadata_cfgs); > + } > + > + err = free_frame_list(&s->cb->frame_list); > if (err) { > av_log(ctx, AV_LOG_ERROR, > - "problem flushing libvmaf context.\n"); > + "problem freeing frame list.\n"); > } > +#endif > > for (unsigned i = 0; i < s->model_cnt; i++) { > double vmaf_score; > + > +#if CONFIG_LIBVMAF_METADATA_ENABLED > + err = vmaf_score_pooled(s->vmaf, s->model[i], pool_method_map(s->pool), > + &vmaf_score, 0, s->eof_frame); > + av_log(ctx, AV_LOG_DEBUG, "frame: %d frame_cnt %d\n", s->eof_frame, s->frame_cnt - 1); > +#else > err = vmaf_score_pooled(s->vmaf, s->model[i], pool_method_map(s->pool), > &vmaf_score, 0, s->frame_cnt - 1); > +#endif > + > if (err) { > av_log(ctx, AV_LOG_ERROR, > "problem getting pooled vmaf score.\n"); > -- > 2.45.2 >
Hi, On Mon, Aug 26, 2024 at 10:51=E2=80=AFAM Yigithan Yigit <yigithanyigitdevel@gmail.com> wrote: > > --- > libavfilter/vf_libvmaf.c | 328 ++++++++++++++++++++++++++++++++++++++- > 1 file changed, 326 insertions(+), 2 deletions(-) > > diff --git a/libavfilter/vf_libvmaf.c b/libavfilter/vf_libvmaf.c > index f655092b20..e6707aff53 100644 > --- a/libavfilter/vf_libvmaf.c > +++ b/libavfilter/vf_libvmaf.c > @@ -27,8 +27,11 @@ > #include "config_components.h" > > #include <libvmaf.h> > +#include <libvmaf/version.h> > > #include "libavutil/avstring.h" > +#include "libavutil/dict.h" > +#include "libavutil/frame.h" > #include "libavutil/mem.h" > #include "libavutil/opt.h" > #include "libavutil/pixdesc.h" > @@ -46,6 +49,31 @@ > #include "libavutil/hwcontext_cuda_internal.h" > #endif > > +#define VMAF_VERSION_INT_VER(major, minor, patch) \ > + ((major) * 10000 + (minor) * 100 + (patch)) > + > +#if VMAF_VERSION_INT_VER(VMAF_API_VERSION_MAJOR, VMAF_API_VERSION_MINOR,= VMAF_API_VERSION_PATCH) > VMAF_VERSION_INT_VER(3, 0, 0) > +#define CONFIG_LIBVMAF_METADATA_ENABLED 1 > +#else > +#define CONFIG_LIBVMAF_METADATA_ENABLED 0 > +#endif You should be able to check pkg_cfg and set this CONFIG_LIBVMAF_METADATA_ENABLED define from the configure script. > + > +#if CONFIG_LIBVMAF_METADATA_ENABLED > +#include <stdatomic.h> > + > +typedef struct FrameList { > + AVFrame *frame; > + unsigned frame_number; > + unsigned propagated_handlers_cnt; > + struct FrameList *next; > +} FrameList; > + > +typedef struct CallbackStruct { > + struct LIBVMAFContext *s; > + FrameList *frame_list; > +} CallbackStruct; > +#endif > + > typedef struct LIBVMAFContext { > const AVClass *class; > FFFrameSync fs; > @@ -56,8 +84,19 @@ typedef struct LIBVMAFContext { > int n_subsample; > char *model_cfg; > char *feature_cfg; > +#if CONFIG_LIBVMAF_METADATA_ENABLED > + char *metadata_feature_cfg; > + struct { > + VmafMetadataConfiguration *metadata_cfgs; > + unsigned metadata_cfg_cnt; > + } metadata_cfg_list; > + CallbackStruct *cb; > + atomic_uint outlink_eof; > + atomic_uint eof_frame; > +#endif > VmafContext *vmaf; > VmafModel **model; > + int flushed; > unsigned model_cnt; > unsigned frame_cnt; > unsigned bpc; > @@ -77,6 +116,9 @@ static const AVOption libvmaf_options[] =3D { > {"n_subsample", "Set interval for frame subsampling used when comput= ing vmaf.", OFFSET(n_subsample), AV_OPT_TYPE_INT, {.i64=3D1}, 1, UINT_M= AX, FLAGS}, > {"model", "Set the model to be used for computing vmaf.", = OFFSET(model_cfg), AV_OPT_TYPE_STRING, {.str=3D"version=3Dv= maf_v0.6.1"}, 0, 1, FLAGS}, > {"feature", "Set the feature to be used for computing vmaf.", = OFFSET(feature_cfg), AV_OPT_TYPE_STRING, {.str=3DNULL}, 0, = 1, FLAGS}, > +#if CONFIG_LIBVMAF_METADATA_ENABLED > + {"metadata_handler", "Set the feature to be propagated as metadata.= ", OFFSET(metadata_feature_cfg), AV_OPT_TYPE_STRING, {.str=3D"= name=3Dvmaf"}, 0, 1, FLAGS}, Would be better to make this option a bool. When true, propagate all registered features and models. You can read the names during init, they should be available inside `parse_models()` and `parse_features()`. > +#endif > { NULL } > }; > > @@ -105,6 +147,123 @@ static enum VmafPixelFormat pix_fmt_map(enum AVPixe= lFormat av_pix_fmt) > } > } > > +#if CONFIG_LIBVMAF_METADATA_ENABLED > +static int add_to_frame_list(FrameList **head, AVFrame *frame, unsigned = frame_number) > +{ > + FrameList *new_frame =3D av_malloc(sizeof(FrameList)); > + if (!new_frame) > + return AVERROR(ENOMEM); > + > + new_frame->frame =3D frame; > + new_frame->frame_number =3D frame_number; > + new_frame->propagated_handlers_cnt =3D 0; > + new_frame->next =3D NULL; > + > + if (*head =3D=3D NULL) { > + *head =3D new_frame; > + } else { > + FrameList *current =3D *head; > + while (current->next !=3D NULL) { > + current =3D current->next; > + } > + current->next =3D new_frame; > + } > + > + return 0; > +} > + > +static int remove_from_frame_list(FrameList **frame_list, unsigned frame= _number) > +{ > + FrameList *cur =3D *frame_list; > + FrameList *prev =3D NULL; > + > + while (cur) { > + if (cur->frame_number =3D=3D frame_number) { > + if (prev) > + prev->next =3D cur->next; > + else > + *frame_list =3D cur->next; > + av_free(cur); > + return 0; > + } > + prev =3D cur; > + cur =3D cur->next; > + } > + > + return AVERROR(EINVAL); > +} > + > +static int free_frame_list(FrameList **frame_list) > +{ > + FrameList *cur =3D *frame_list; > + while (cur) { > + FrameList *next =3D cur->next; > + av_frame_free(&cur->frame); > + av_free(cur); > + cur =3D next; > + } > + *frame_list =3D NULL; > + return 0; > +} > + > +static FrameList* get_frame_from_frame_list(FrameList *frame_list, > + unsigned frame_number) > +{ > + FrameList *cur =3D frame_list; > + while (cur) { > + if (cur->frame_number =3D=3D frame_number) > + return cur; > + cur =3D cur->next; > + } > + return NULL; > +} > + Would be great if we didn't need to invent a data structure here. I guess av_fifo is no good here because metadata callbacks are not guaranteed to come in order? > +static void set_meta(void *data, VmafMetadata *metadata) > +{ > + int err =3D 0; > + FrameList *current_frame =3D NULL; > + CallbackStruct *cb =3D data; > + char value[128], key[128]; > + snprintf(value, sizeof(value), "%0.2f", metadata->score); > + snprintf(key, sizeof(key), "%s.%d", metadata->feature_name, metadata= ->picture_index); > + > + current_frame =3D get_frame_from_frame_list(cb->frame_list, metadata= ->picture_index); > + if (!current_frame) { > + av_log(NULL, AV_LOG_ERROR, "could not find frame with index: %d\= n", > + metadata->picture_index); > + return; > + } > + > + err =3D av_dict_set(¤t_frame->frame->metadata, key, value, 0); > + if (err < 0) > + av_log(NULL, AV_LOG_ERROR, "could not set metadata: %s\n", key); > + > + current_frame->propagated_handlers_cnt++; > + > + if (current_frame->propagated_handlers_cnt =3D=3D cb->s->metadata_cf= g_list.metadata_cfg_cnt) { > + FrameList *cur =3D cb->frame_list; > + // This code block allows to send frames monotonically > + while(cur && cur->frame_number <=3D metadata->picture_index) { > + if (cur->propagated_handlers_cnt =3D=3D cb->s->metadata_cfg_= list.metadata_cfg_cnt) { > + FrameList *next; > + // Check outlink is closed > + if (!cb->s->outlink_eof) { > + av_log(cb->s->fs.parent, AV_LOG_DEBUG, "VMAF feature= : %d, score: %f\n", cur->frame_number, metadata->score); > + cb->s->eof_frame =3D cur->frame_number; > + if(ff_filter_frame(cb->s->fs.parent->outputs[0], cur= ->frame)) > + return; > + } > + next =3D cur->next; > + remove_from_frame_list(&cb->frame_list, cur->frame_numbe= r); > + cur =3D next; > + } > + else > + break; > + } > + } > +} > +#endif > + > static int copy_picture_data(AVFrame *src, VmafPicture *dst, unsigned bp= c) > { > const int bytes_per_value =3D bpc > 8 ? 2 : 1; > @@ -160,13 +319,28 @@ static int do_vmaf(FFFrameSync *fs) > return AVERROR(ENOMEM); > } > > +#if CONFIG_LIBVMAF_METADATA_ENABLED > + err =3D add_to_frame_list(&s->cb->frame_list, dist, s->frame_cnt); > + if (err) { > + av_log(s, AV_LOG_ERROR, "problem during add_to_frame_list.\n"); > + return AVERROR(ENOMEM); > + } > +#endif > + > err =3D vmaf_read_pictures(s->vmaf, &pic_ref, &pic_dist, s->frame_cn= t++); > if (err) { > av_log(s, AV_LOG_ERROR, "problem during vmaf_read_pictures.\n"); > return AVERROR(EINVAL); > } > > +#if CONFIG_LIBVMAF_METADATA_ENABLED > + if (s->metadata_cfg_list.metadata_cfg_cnt) > + return 0; > + else > + return ff_filter_frame(ctx->outputs[0], dist); > +#else > return ff_filter_frame(ctx->outputs[0], dist); > +#endif > } > > static AVDictionary **delimited_dict_parse(char *str, unsigned *cnt) > @@ -408,6 +582,83 @@ exit: > return err; > } > > +#if CONFIG_LIBVMAF_METADATA_ENABLED > +static int parse_metadata_handlers(AVFilterContext *ctx) > +{ > + LIBVMAFContext *s =3D ctx->priv; > + AVDictionary **dict; > + unsigned dict_cnt; > + int err =3D 0; > + > + if (!s->metadata_feature_cfg) > + return 0; > + > + dict_cnt =3D 0; > + dict =3D delimited_dict_parse(s->metadata_feature_cfg, &dict_cnt); > + if (!dict) { > + av_log(ctx, AV_LOG_ERROR, > + "could not parse metadata feature config: %s\n", > + s->metadata_feature_cfg); > + return AVERROR(EINVAL); > + } > + > + for (unsigned i =3D 0; i < dict_cnt; i++) { > + VmafMetadataConfiguration *metadata_cfg =3D av_calloc(1, sizeof(= *metadata_cfg)); > + const AVDictionaryEntry *e =3D NULL; > + char *feature_name =3D NULL; > + > + while (e =3D av_dict_iterate(dict[i], e)) { > + if (!strcmp(e->key, "name")) { > + metadata_cfg->feature_name =3D av_strdup(e->value); > + continue; > + } > + } > + > + metadata_cfg->data =3D s->cb; > + metadata_cfg->callback =3D &set_meta; > + > + err =3D vmaf_register_metadata_handler(s->vmaf, *metadata_cfg); > + if (err) { > + av_log(ctx, AV_LOG_ERROR, > + "problem during vmaf_register_metadata_handler: %s\n"= , > + feature_name); > + goto exit; > + } > + > + s->metadata_cfg_list.metadata_cfgs =3D av_realloc(s->metadata_cf= g_list.metadata_cfgs, > + (s->metadata_cfg_list.metad= ata_cfg_cnt + 1) * > + sizeof(*s->metadata_cfg_lis= t.metadata_cfgs)); > + if (!s->metadata_cfg_list.metadata_cfgs) { > + err =3D AVERROR(ENOMEM); > + goto exit; > + } > + > + s->metadata_cfg_list.metadata_cfgs[s->metadata_cfg_list.metadata= _cfg_cnt++] =3D *metadata_cfg; > + } > + > +exit: > + for (unsigned i =3D 0; i < dict_cnt; i++) { > + if (dict[i]) > + av_dict_free(&dict[i]); > + } > + av_free(dict); > + return err; > +} > + > +static int init_metadata(AVFilterContext *ctx) > +{ > + LIBVMAFContext *s =3D ctx->priv; > + > + s->cb =3D av_calloc(1, sizeof(CallbackStruct)); > + if (!s->cb) > + return AVERROR(ENOMEM); > + > + s->cb->s =3D s; > + > + return 0; > +} > +#endif > + > static enum VmafLogLevel log_level_map(int log_level) > { > switch (log_level) { > @@ -441,6 +692,16 @@ static av_cold int init(AVFilterContext *ctx) > if (err) > return AVERROR(EINVAL); > > +#if CONFIG_LIBVMAF_METADATA_ENABLED > + err =3D init_metadata(ctx); > + if (err) > + return err; > + > + err =3D parse_metadata_handlers(ctx); > + if (err) > + return err; > +#endif > + > err =3D parse_models(ctx); > if (err) > return err; > @@ -518,6 +779,38 @@ static int config_output(AVFilterLink *outlink) > static int activate(AVFilterContext *ctx) > { > LIBVMAFContext *s =3D ctx->priv; > +#if CONFIG_LIBVMAF_METADATA_ENABLED > + // There are 2 cases for metadata propagation: > + // 1. Where the case that outlink closes > + // 2. Where inlink closes > + // Case 1: > + // In this case we need check outlink somehow for the status in ev= ery iteration. > + // If outlink is not wanting frame anymore, we need to proceed wit= h uninit with setting inlink. > + // But nature of multithreading settting eof inside the activate c= all can make sync issues and > + // can lead to extra propagated frames. Atomic variables are used = to avoid this. > + // Case 2: > + // This case relatively easy to handle. Because of calculation of = vmaf score takes time > + // So `do_vmaf` buffers many of frames before sending to outlink t= hat causes > + // premature close of outlink. > + // Checking inlink status is enough and if inlink =3D=3D eof flush= ing vmaf is enough for this. > + int64_t pts; > + int status, ret =3D 0; > + > + if (ff_outlink_get_status(ctx->outputs[0])) > + s->outlink_eof =3D 1; > + > + if (ff_inlink_acknowledge_status(ctx->inputs[0], &status, &pts) && > + ff_inlink_acknowledge_status(ctx->inputs[1], &status, &pts)) { > + if (!s->flushed) { > + ret =3D vmaf_read_pictures(s->vmaf, NULL, NULL, 0); > + if (ret) > + av_log(ctx, AV_LOG_ERROR, > + "problem flushing libvmaf context.\n"); > + else > + s->flushed =3D 1; > + } > + } > +#endif > return ff_framesync_activate(&s->fs); > } > > @@ -556,21 +849,52 @@ static av_cold void uninit(AVFilterContext *ctx) > LIBVMAFContext *s =3D ctx->priv; > int err =3D 0; > > +#if CONFIG_LIBVMAF_METADATA_ENABLED > + if (!s->outlink_eof) > + s->outlink_eof =3D 1; > +#endif > + > ff_framesync_uninit(&s->fs); > > if (!s->frame_cnt) > goto clean_up; > > - err =3D vmaf_read_pictures(s->vmaf, NULL, NULL, 0); > + if (!s->flushed) { > + err =3D vmaf_read_pictures(s->vmaf, NULL, NULL, 0); > + if (err) { > + av_log(ctx, AV_LOG_ERROR, > + "problem flushing libvmaf context.\n"); > + } else > + s->flushed =3D 1; > + } > + > +#if CONFIG_LIBVMAF_METADATA_ENABLED > + if (s->metadata_cfg_list.metadata_cfgs) { > + for (unsigned i =3D 0; i < s->metadata_cfg_list.metadata_cfg_cnt= ; i++) { > + av_free(s->metadata_cfg_list.metadata_cfgs[i].feature_name); > + } > + av_free(s->metadata_cfg_list.metadata_cfgs); > + } > + > + err =3D free_frame_list(&s->cb->frame_list); > if (err) { > av_log(ctx, AV_LOG_ERROR, > - "problem flushing libvmaf context.\n"); > + "problem freeing frame list.\n"); > } > +#endif > > for (unsigned i =3D 0; i < s->model_cnt; i++) { > double vmaf_score; > + > +#if CONFIG_LIBVMAF_METADATA_ENABLED > + err =3D vmaf_score_pooled(s->vmaf, s->model[i], pool_method_map(= s->pool), > + &vmaf_score, 0, s->eof_frame); > + av_log(ctx, AV_LOG_DEBUG, "frame: %d frame_cnt %d\n", s->eof_fra= me, s->frame_cnt - 1); > +#else > err =3D vmaf_score_pooled(s->vmaf, s->model[i], pool_method_map(= s->pool), > &vmaf_score, 0, s->frame_cnt - 1); > +#endif > + > if (err) { > av_log(ctx, AV_LOG_ERROR, > "problem getting pooled vmaf score.\n"); > -- > 2.45.2 > Thanks, Kyle
Hi, Thanks for feedbacks! > On Aug 30, 2024, at 7:44 AM, Kyle Swanson <k@ylo.ph> wrote: > > Hi, > > > On Mon, Aug 26, 2024 at 10:51=E2=80=AFAM Yigithan Yigit > <yigithanyigitdevel@gmail.com <mailto:yigithanyigitdevel@gmail.com>> wrote: >> >> --- >> libavfilter/vf_libvmaf.c | 328 ++++++++++++++++++++++++++++++++++++++- >> 1 file changed, 326 insertions(+), 2 deletions(-) >> >> diff --git a/libavfilter/vf_libvmaf.c b/libavfilter/vf_libvmaf.c >> index f655092b20..e6707aff53 100644 >> --- a/libavfilter/vf_libvmaf.c >> +++ b/libavfilter/vf_libvmaf.c >> @@ -27,8 +27,11 @@ >> #include "config_components.h" >> >> #include <libvmaf.h> >> +#include <libvmaf/version.h> >> >> #include "libavutil/avstring.h" >> +#include "libavutil/dict.h" >> +#include "libavutil/frame.h" >> #include "libavutil/mem.h" >> #include "libavutil/opt.h" >> #include "libavutil/pixdesc.h" >> @@ -46,6 +49,31 @@ >> #include "libavutil/hwcontext_cuda_internal.h" >> #endif >> >> +#define VMAF_VERSION_INT_VER(major, minor, patch) \ >> + ((major) * 10000 + (minor) * 100 + (patch)) >> + >> +#if VMAF_VERSION_INT_VER(VMAF_API_VERSION_MAJOR, VMAF_API_VERSION_MINOR,= > VMAF_API_VERSION_PATCH) > VMAF_VERSION_INT_VER(3, 0, 0) >> +#define CONFIG_LIBVMAF_METADATA_ENABLED 1 >> +#else >> +#define CONFIG_LIBVMAF_METADATA_ENABLED 0 >> +#endif > > You should be able to check pkg_cfg and set this > CONFIG_LIBVMAF_METADATA_ENABLED define from the configure script. Fixed locally. > >> + >> +#if CONFIG_LIBVMAF_METADATA_ENABLED >> +#include <stdatomic.h> >> + >> +typedef struct FrameList { >> + AVFrame *frame; >> + unsigned frame_number; >> + unsigned propagated_handlers_cnt; >> + struct FrameList *next; >> +} FrameList; >> + >> +typedef struct CallbackStruct { >> + struct LIBVMAFContext *s; >> + FrameList *frame_list; >> +} CallbackStruct; >> +#endif >> + >> typedef struct LIBVMAFContext { >> const AVClass *class; >> FFFrameSync fs; >> @@ -56,8 +84,19 @@ typedef struct LIBVMAFContext { >> int n_subsample; >> char *model_cfg; >> char *feature_cfg; >> +#if CONFIG_LIBVMAF_METADATA_ENABLED >> + char *metadata_feature_cfg; >> + struct { >> + VmafMetadataConfiguration *metadata_cfgs; >> + unsigned metadata_cfg_cnt; >> + } metadata_cfg_list; >> + CallbackStruct *cb; >> + atomic_uint outlink_eof; >> + atomic_uint eof_frame; >> +#endif >> VmafContext *vmaf; >> VmafModel **model; >> + int flushed; >> unsigned model_cnt; >> unsigned frame_cnt; >> unsigned bpc; >> @@ -77,6 +116,9 @@ static const AVOption libvmaf_options[] =3D { >> {"n_subsample", "Set interval for frame subsampling used when comput= > ing vmaf.", OFFSET(n_subsample), AV_OPT_TYPE_INT, {.i64=3D1}, 1, UINT_M= > AX, FLAGS}, >> {"model", "Set the model to be used for computing vmaf.", = > OFFSET(model_cfg), AV_OPT_TYPE_STRING, {.str=3D"version=3Dv= > maf_v0.6.1"}, 0, 1, FLAGS}, >> {"feature", "Set the feature to be used for computing vmaf.", = > OFFSET(feature_cfg), AV_OPT_TYPE_STRING, {.str=3DNULL}, 0, = > 1, FLAGS}, >> +#if CONFIG_LIBVMAF_METADATA_ENABLED >> + {"metadata_handler", "Set the feature to be propagated as metadata.= > ", OFFSET(metadata_feature_cfg), AV_OPT_TYPE_STRING, {.str=3D"= > name=3Dvmaf"}, 0, 1, FLAGS}, > > Would be better to make this option a bool. When true, propagate all > registered features and models. You can read the names during init, > they should be available inside `parse_models()` and > `parse_features()`. Yes, but we design vmaf api for individual metrics. Using an identifier doesn’t work unfortunately and as far as I know there is no API for accessing individual features with identifiers. However I made a small patch for that. That brings more generic use case for the API. https://github.com/Netflix/vmaf/pull/1387 > >> +#endif >> { NULL } >> }; >> >> @@ -105,6 +147,123 @@ static enum VmafPixelFormat pix_fmt_map(enum AVPixe= > lFormat av_pix_fmt) >> } >> } >> >> +#if CONFIG_LIBVMAF_METADATA_ENABLED >> +static int add_to_frame_list(FrameList **head, AVFrame *frame, unsigned = > frame_number) >> +{ >> + FrameList *new_frame =3D av_malloc(sizeof(FrameList)); >> + if (!new_frame) >> + return AVERROR(ENOMEM); >> + >> + new_frame->frame =3D frame; >> + new_frame->frame_number =3D frame_number; >> + new_frame->propagated_handlers_cnt =3D 0; >> + new_frame->next =3D NULL; >> + >> + if (*head =3D=3D NULL) { >> + *head =3D new_frame; >> + } else { >> + FrameList *current =3D *head; >> + while (current->next !=3D NULL) { >> + current =3D current->next; >> + } >> + current->next =3D new_frame; >> + } >> + >> + return 0; >> +} >> + >> +static int remove_from_frame_list(FrameList **frame_list, unsigned frame= > _number) >> +{ >> + FrameList *cur =3D *frame_list; >> + FrameList *prev =3D NULL; >> + >> + while (cur) { >> + if (cur->frame_number =3D=3D frame_number) { >> + if (prev) >> + prev->next =3D cur->next; >> + else >> + *frame_list =3D cur->next; >> + av_free(cur); >> + return 0; >> + } >> + prev =3D cur; >> + cur =3D cur->next; >> + } >> + >> + return AVERROR(EINVAL); >> +} >> + >> +static int free_frame_list(FrameList **frame_list) >> +{ >> + FrameList *cur =3D *frame_list; >> + while (cur) { >> + FrameList *next =3D cur->next; >> + av_frame_free(&cur->frame); >> + av_free(cur); >> + cur =3D next; >> + } >> + *frame_list =3D NULL; >> + return 0; >> +} >> + >> +static FrameList* get_frame_from_frame_list(FrameList *frame_list, >> + unsigned frame_number) >> +{ >> + FrameList *cur =3D frame_list; >> + while (cur) { >> + if (cur->frame_number =3D=3D frame_number) >> + return cur; >> + cur =3D cur->next; >> + } >> + return NULL; >> +} >> + > > Would be great if we didn't need to invent a data structure here. I > guess av_fifo is no good here because metadata callbacks are not > guaranteed to come in order? Yes, metadata callbacks are not coming in order. I looked libavutil and I couldn’t find a data structure for this use case. I am open to suggestions If we don’t prefer to use this data structure. > >> +static void set_meta(void *data, VmafMetadata *metadata) >> +{ >> + int err =3D 0; >> + FrameList *current_frame =3D NULL; >> + CallbackStruct *cb =3D data; >> + char value[128], key[128]; >> + snprintf(value, sizeof(value), "%0.2f", metadata->score); >> + snprintf(key, sizeof(key), "%s.%d", metadata->feature_name, metadata= > ->picture_index); >> + >> + current_frame =3D get_frame_from_frame_list(cb->frame_list, metadata= > ->picture_index); >> + if (!current_frame) { >> + av_log(NULL, AV_LOG_ERROR, "could not find frame with index: %d\= > n", >> + metadata->picture_index); >> + return; >> + } >> + >> + err =3D av_dict_set(¤t_frame->frame->metadata, key, value, 0); >> + if (err < 0) >> + av_log(NULL, AV_LOG_ERROR, "could not set metadata: %s\n", key); >> + >> + current_frame->propagated_handlers_cnt++; >> + >> + if (current_frame->propagated_handlers_cnt =3D=3D cb->s->metadata_cf= > g_list.metadata_cfg_cnt) { >> + FrameList *cur =3D cb->frame_list; >> + // This code block allows to send frames monotonically >> + while(cur && cur->frame_number <=3D metadata->picture_index) { >> + if (cur->propagated_handlers_cnt =3D=3D cb->s->metadata_cfg_= > list.metadata_cfg_cnt) { >> + FrameList *next; >> + // Check outlink is closed >> + if (!cb->s->outlink_eof) { >> + av_log(cb->s->fs.parent, AV_LOG_DEBUG, "VMAF feature= > : %d, score: %f\n", cur->frame_number, metadata->score); >> + cb->s->eof_frame =3D cur->frame_number; >> + if(ff_filter_frame(cb->s->fs.parent->outputs[0], cur= > ->frame)) >> + return; >> + } >> + next =3D cur->next; >> + remove_from_frame_list(&cb->frame_list, cur->frame_numbe= > r); >> + cur =3D next; >> + } >> + else >> + break; >> + } >> + } >> +} >> +#endif >> + >> static int copy_picture_data(AVFrame *src, VmafPicture *dst, unsigned bp= > c) >> { >> const int bytes_per_value =3D bpc > 8 ? 2 : 1; >> @@ -160,13 +319,28 @@ static int do_vmaf(FFFrameSync *fs) >> return AVERROR(ENOMEM); >> } >> >> +#if CONFIG_LIBVMAF_METADATA_ENABLED >> + err =3D add_to_frame_list(&s->cb->frame_list, dist, s->frame_cnt); >> + if (err) { >> + av_log(s, AV_LOG_ERROR, "problem during add_to_frame_list.\n"); >> + return AVERROR(ENOMEM); >> + } >> +#endif >> + >> err =3D vmaf_read_pictures(s->vmaf, &pic_ref, &pic_dist, s->frame_cn= > t++); >> if (err) { >> av_log(s, AV_LOG_ERROR, "problem during vmaf_read_pictures.\n"); >> return AVERROR(EINVAL); >> } >> >> +#if CONFIG_LIBVMAF_METADATA_ENABLED >> + if (s->metadata_cfg_list.metadata_cfg_cnt) >> + return 0; >> + else >> + return ff_filter_frame(ctx->outputs[0], dist); >> +#else >> return ff_filter_frame(ctx->outputs[0], dist); >> +#endif >> } >> >> static AVDictionary **delimited_dict_parse(char *str, unsigned *cnt) >> @@ -408,6 +582,83 @@ exit: >> return err; >> } >> >> +#if CONFIG_LIBVMAF_METADATA_ENABLED >> +static int parse_metadata_handlers(AVFilterContext *ctx) >> +{ >> + LIBVMAFContext *s =3D ctx->priv; >> + AVDictionary **dict; >> + unsigned dict_cnt; >> + int err =3D 0; >> + >> + if (!s->metadata_feature_cfg) >> + return 0; >> + >> + dict_cnt =3D 0; >> + dict =3D delimited_dict_parse(s->metadata_feature_cfg, &dict_cnt); >> + if (!dict) { >> + av_log(ctx, AV_LOG_ERROR, >> + "could not parse metadata feature config: %s\n", >> + s->metadata_feature_cfg); >> + return AVERROR(EINVAL); >> + } >> + >> + for (unsigned i =3D 0; i < dict_cnt; i++) { >> + VmafMetadataConfiguration *metadata_cfg =3D av_calloc(1, sizeof(= > *metadata_cfg)); >> + const AVDictionaryEntry *e =3D NULL; >> + char *feature_name =3D NULL; >> + >> + while (e =3D av_dict_iterate(dict[i], e)) { >> + if (!strcmp(e->key, "name")) { >> + metadata_cfg->feature_name =3D av_strdup(e->value); >> + continue; >> + } >> + } >> + >> + metadata_cfg->data =3D s->cb; >> + metadata_cfg->callback =3D &set_meta; >> + >> + err =3D vmaf_register_metadata_handler(s->vmaf, *metadata_cfg); >> + if (err) { >> + av_log(ctx, AV_LOG_ERROR, >> + "problem during vmaf_register_metadata_handler: %s\n"= > , >> + feature_name); >> + goto exit; >> + } >> + >> + s->metadata_cfg_list.metadata_cfgs =3D av_realloc(s->metadata_cf= > g_list.metadata_cfgs, >> + (s->metadata_cfg_list.metad= > ata_cfg_cnt + 1) * >> + sizeof(*s->metadata_cfg_lis= > t.metadata_cfgs)); >> + if (!s->metadata_cfg_list.metadata_cfgs) { >> + err =3D AVERROR(ENOMEM); >> + goto exit; >> + } >> + >> + s->metadata_cfg_list.metadata_cfgs[s->metadata_cfg_list.metadata= > _cfg_cnt++] =3D *metadata_cfg; >> + } >> + >> +exit: >> + for (unsigned i =3D 0; i < dict_cnt; i++) { >> + if (dict[i]) >> + av_dict_free(&dict[i]); >> + } >> + av_free(dict); >> + return err; >> +} >> + >> +static int init_metadata(AVFilterContext *ctx) >> +{ >> + LIBVMAFContext *s =3D ctx->priv; >> + >> + s->cb =3D av_calloc(1, sizeof(CallbackStruct)); >> + if (!s->cb) >> + return AVERROR(ENOMEM); >> + >> + s->cb->s =3D s; >> + >> + return 0; >> +} >> +#endif >> + >> static enum VmafLogLevel log_level_map(int log_level) >> { >> switch (log_level) { >> @@ -441,6 +692,16 @@ static av_cold int init(AVFilterContext *ctx) >> if (err) >> return AVERROR(EINVAL); >> >> +#if CONFIG_LIBVMAF_METADATA_ENABLED >> + err =3D init_metadata(ctx); >> + if (err) >> + return err; >> + >> + err =3D parse_metadata_handlers(ctx); >> + if (err) >> + return err; >> +#endif >> + >> err =3D parse_models(ctx); >> if (err) >> return err; >> @@ -518,6 +779,38 @@ static int config_output(AVFilterLink *outlink) >> static int activate(AVFilterContext *ctx) >> { >> LIBVMAFContext *s =3D ctx->priv; >> +#if CONFIG_LIBVMAF_METADATA_ENABLED >> + // There are 2 cases for metadata propagation: >> + // 1. Where the case that outlink closes >> + // 2. Where inlink closes >> + // Case 1: >> + // In this case we need check outlink somehow for the status in ev= > ery iteration. >> + // If outlink is not wanting frame anymore, we need to proceed wit= > h uninit with setting inlink. >> + // But nature of multithreading settting eof inside the activate c= > all can make sync issues and >> + // can lead to extra propagated frames. Atomic variables are used = > to avoid this. >> + // Case 2: >> + // This case relatively easy to handle. Because of calculation of = > vmaf score takes time >> + // So `do_vmaf` buffers many of frames before sending to outlink t= > hat causes >> + // premature close of outlink. >> + // Checking inlink status is enough and if inlink =3D=3D eof flush= > ing vmaf is enough for this. >> + int64_t pts; >> + int status, ret =3D 0; >> + >> + if (ff_outlink_get_status(ctx->outputs[0])) >> + s->outlink_eof =3D 1; >> + >> + if (ff_inlink_acknowledge_status(ctx->inputs[0], &status, &pts) && >> + ff_inlink_acknowledge_status(ctx->inputs[1], &status, &pts)) { >> + if (!s->flushed) { >> + ret =3D vmaf_read_pictures(s->vmaf, NULL, NULL, 0); >> + if (ret) >> + av_log(ctx, AV_LOG_ERROR, >> + "problem flushing libvmaf context.\n"); >> + else >> + s->flushed =3D 1; >> + } >> + } >> +#endif >> return ff_framesync_activate(&s->fs); >> } >> >> @@ -556,21 +849,52 @@ static av_cold void uninit(AVFilterContext *ctx) >> LIBVMAFContext *s =3D ctx->priv; >> int err =3D 0; >> >> +#if CONFIG_LIBVMAF_METADATA_ENABLED >> + if (!s->outlink_eof) >> + s->outlink_eof =3D 1; >> +#endif >> + >> ff_framesync_uninit(&s->fs); >> >> if (!s->frame_cnt) >> goto clean_up; >> >> - err =3D vmaf_read_pictures(s->vmaf, NULL, NULL, 0); >> + if (!s->flushed) { >> + err =3D vmaf_read_pictures(s->vmaf, NULL, NULL, 0); >> + if (err) { >> + av_log(ctx, AV_LOG_ERROR, >> + "problem flushing libvmaf context.\n"); >> + } else >> + s->flushed =3D 1; >> + } >> + >> +#if CONFIG_LIBVMAF_METADATA_ENABLED >> + if (s->metadata_cfg_list.metadata_cfgs) { >> + for (unsigned i =3D 0; i < s->metadata_cfg_list.metadata_cfg_cnt= > ; i++) { >> + av_free(s->metadata_cfg_list.metadata_cfgs[i].feature_name); >> + } >> + av_free(s->metadata_cfg_list.metadata_cfgs); >> + } >> + >> + err =3D free_frame_list(&s->cb->frame_list); >> if (err) { >> av_log(ctx, AV_LOG_ERROR, >> - "problem flushing libvmaf context.\n"); >> + "problem freeing frame list.\n"); >> } >> +#endif >> >> for (unsigned i =3D 0; i < s->model_cnt; i++) { >> double vmaf_score; >> + >> +#if CONFIG_LIBVMAF_METADATA_ENABLED >> + err =3D vmaf_score_pooled(s->vmaf, s->model[i], pool_method_map(= > s->pool), >> + &vmaf_score, 0, s->eof_frame); >> + av_log(ctx, AV_LOG_DEBUG, "frame: %d frame_cnt %d\n", s->eof_fra= > me, s->frame_cnt - 1); >> +#else >> err =3D vmaf_score_pooled(s->vmaf, s->model[i], pool_method_map(= > s->pool), >> &vmaf_score, 0, s->frame_cnt - 1); >> +#endif >> + >> if (err) { >> av_log(ctx, AV_LOG_ERROR, >> "problem getting pooled vmaf score.\n"); >> -- >> 2.45.2 >> > > Thanks, > Kyle Thanks, Yigithan
Hi,
On Wed, Sep 11, 2024 at 5:41 PM Yigithan Yigit
<yigithanyigitdevel@gmail.com> wrote:
> Yes, metadata callbacks are not coming in order. I looked libavutil and I couldn’t find a data structure for this use case. I am open to suggestions If we don’t prefer to use this data structure.
Could we somehow get the AVFrame pointer in your custom callback data
or the VmafMetadata data? When all of the expected metadata is written
then you'll know you're ready to forward the frame through the
filtergraph. Doing it this way hopefully avoids a lot of the code and
traversing through linked lists looking for AVFrame pointers as you've
done.
Thanks,
Kyle
Hi, > Could we somehow get the AVFrame pointer in your custom callback data > or the VmafMetadata data? When all of the expected metadata is written > then you'll know you're ready to forward the frame through the > filtergraph. Doing it this way hopefully avoids a lot of the code and > traversing through linked lists looking for AVFrame pointers as you've > done. Thanks for the suggestion! I think main difficulty of the implementation is sending non-monotical vmaf frames in order. Tracking the written metadata is not enough in my opinion. We need some kind of structure that need to put frames in order before they sent or it should need to send in order somehow. I have been working on kinda similar solution for couple of days that I did with linked list but with using av_fifo instead of my implementation. It looks working fine. Tests are welcome. https://github.com/yigithanyigit/FFmpeg/pull/3/files NOTE: This implementation uses small vmaf patch that I mentioned earlier in the thread. Thanks, Yigithan
diff --git a/libavfilter/vf_libvmaf.c b/libavfilter/vf_libvmaf.c index f655092b20..e6707aff53 100644 --- a/libavfilter/vf_libvmaf.c +++ b/libavfilter/vf_libvmaf.c @@ -27,8 +27,11 @@ #include "config_components.h" #include <libvmaf.h> +#include <libvmaf/version.h> #include "libavutil/avstring.h" +#include "libavutil/dict.h" +#include "libavutil/frame.h" #include "libavutil/mem.h" #include "libavutil/opt.h" #include "libavutil/pixdesc.h" @@ -46,6 +49,31 @@ #include "libavutil/hwcontext_cuda_internal.h" #endif +#define VMAF_VERSION_INT_VER(major, minor, patch) \ + ((major) * 10000 + (minor) * 100 + (patch)) + +#if VMAF_VERSION_INT_VER(VMAF_API_VERSION_MAJOR, VMAF_API_VERSION_MINOR, VMAF_API_VERSION_PATCH) > VMAF_VERSION_INT_VER(3, 0, 0) +#define CONFIG_LIBVMAF_METADATA_ENABLED 1 +#else +#define CONFIG_LIBVMAF_METADATA_ENABLED 0 +#endif + +#if CONFIG_LIBVMAF_METADATA_ENABLED +#include <stdatomic.h> + +typedef struct FrameList { + AVFrame *frame; + unsigned frame_number; + unsigned propagated_handlers_cnt; + struct FrameList *next; +} FrameList; + +typedef struct CallbackStruct { + struct LIBVMAFContext *s; + FrameList *frame_list; +} CallbackStruct; +#endif + typedef struct LIBVMAFContext { const AVClass *class; FFFrameSync fs; @@ -56,8 +84,19 @@ typedef struct LIBVMAFContext { int n_subsample; char *model_cfg; char *feature_cfg; +#if CONFIG_LIBVMAF_METADATA_ENABLED + char *metadata_feature_cfg; + struct { + VmafMetadataConfiguration *metadata_cfgs; + unsigned metadata_cfg_cnt; + } metadata_cfg_list; + CallbackStruct *cb; + atomic_uint outlink_eof; + atomic_uint eof_frame; +#endif VmafContext *vmaf; VmafModel **model; + int flushed; unsigned model_cnt; unsigned frame_cnt; unsigned bpc; @@ -77,6 +116,9 @@ static const AVOption libvmaf_options[] = { {"n_subsample", "Set interval for frame subsampling used when computing vmaf.", OFFSET(n_subsample), AV_OPT_TYPE_INT, {.i64=1}, 1, UINT_MAX, FLAGS}, {"model", "Set the model to be used for computing vmaf.", OFFSET(model_cfg), AV_OPT_TYPE_STRING, {.str="version=vmaf_v0.6.1"}, 0, 1, FLAGS}, {"feature", "Set the feature to be used for computing vmaf.", OFFSET(feature_cfg), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS}, +#if CONFIG_LIBVMAF_METADATA_ENABLED + {"metadata_handler", "Set the feature to be propagated as metadata.", OFFSET(metadata_feature_cfg), AV_OPT_TYPE_STRING, {.str="name=vmaf"}, 0, 1, FLAGS}, +#endif { NULL } }; @@ -105,6 +147,123 @@ static enum VmafPixelFormat pix_fmt_map(enum AVPixelFormat av_pix_fmt) } } +#if CONFIG_LIBVMAF_METADATA_ENABLED +static int add_to_frame_list(FrameList **head, AVFrame *frame, unsigned frame_number) +{ + FrameList *new_frame = av_malloc(sizeof(FrameList)); + if (!new_frame) + return AVERROR(ENOMEM); + + new_frame->frame = frame; + new_frame->frame_number = frame_number; + new_frame->propagated_handlers_cnt = 0; + new_frame->next = NULL; + + if (*head == NULL) { + *head = new_frame; + } else { + FrameList *current = *head; + while (current->next != NULL) { + current = current->next; + } + current->next = new_frame; + } + + return 0; +} + +static int remove_from_frame_list(FrameList **frame_list, unsigned frame_number) +{ + FrameList *cur = *frame_list; + FrameList *prev = NULL; + + while (cur) { + if (cur->frame_number == frame_number) { + if (prev) + prev->next = cur->next; + else + *frame_list = cur->next; + av_free(cur); + return 0; + } + prev = cur; + cur = cur->next; + } + + return AVERROR(EINVAL); +} + +static int free_frame_list(FrameList **frame_list) +{ + FrameList *cur = *frame_list; + while (cur) { + FrameList *next = cur->next; + av_frame_free(&cur->frame); + av_free(cur); + cur = next; + } + *frame_list = NULL; + return 0; +} + +static FrameList* get_frame_from_frame_list(FrameList *frame_list, + unsigned frame_number) +{ + FrameList *cur = frame_list; + while (cur) { + if (cur->frame_number == frame_number) + return cur; + cur = cur->next; + } + return NULL; +} + +static void set_meta(void *data, VmafMetadata *metadata) +{ + int err = 0; + FrameList *current_frame = NULL; + CallbackStruct *cb = data; + char value[128], key[128]; + snprintf(value, sizeof(value), "%0.2f", metadata->score); + snprintf(key, sizeof(key), "%s.%d", metadata->feature_name, metadata->picture_index); + + current_frame = get_frame_from_frame_list(cb->frame_list, metadata->picture_index); + if (!current_frame) { + av_log(NULL, AV_LOG_ERROR, "could not find frame with index: %d\n", + metadata->picture_index); + return; + } + + err = av_dict_set(¤t_frame->frame->metadata, key, value, 0); + if (err < 0) + av_log(NULL, AV_LOG_ERROR, "could not set metadata: %s\n", key); + + current_frame->propagated_handlers_cnt++; + + if (current_frame->propagated_handlers_cnt == cb->s->metadata_cfg_list.metadata_cfg_cnt) { + FrameList *cur = cb->frame_list; + // This code block allows to send frames monotonically + while(cur && cur->frame_number <= metadata->picture_index) { + if (cur->propagated_handlers_cnt == cb->s->metadata_cfg_list.metadata_cfg_cnt) { + FrameList *next; + // Check outlink is closed + if (!cb->s->outlink_eof) { + av_log(cb->s->fs.parent, AV_LOG_DEBUG, "VMAF feature: %d, score: %f\n", cur->frame_number, metadata->score); + cb->s->eof_frame = cur->frame_number; + if(ff_filter_frame(cb->s->fs.parent->outputs[0], cur->frame)) + return; + } + next = cur->next; + remove_from_frame_list(&cb->frame_list, cur->frame_number); + cur = next; + } + else + break; + } + } +} +#endif + static int copy_picture_data(AVFrame *src, VmafPicture *dst, unsigned bpc) { const int bytes_per_value = bpc > 8 ? 2 : 1; @@ -160,13 +319,28 @@ static int do_vmaf(FFFrameSync *fs) return AVERROR(ENOMEM); } +#if CONFIG_LIBVMAF_METADATA_ENABLED + err = add_to_frame_list(&s->cb->frame_list, dist, s->frame_cnt); + if (err) { + av_log(s, AV_LOG_ERROR, "problem during add_to_frame_list.\n"); + return AVERROR(ENOMEM); + } +#endif + err = vmaf_read_pictures(s->vmaf, &pic_ref, &pic_dist, s->frame_cnt++); if (err) { av_log(s, AV_LOG_ERROR, "problem during vmaf_read_pictures.\n"); return AVERROR(EINVAL); } +#if CONFIG_LIBVMAF_METADATA_ENABLED + if (s->metadata_cfg_list.metadata_cfg_cnt) + return 0; + else + return ff_filter_frame(ctx->outputs[0], dist); +#else return ff_filter_frame(ctx->outputs[0], dist); +#endif } static AVDictionary **delimited_dict_parse(char *str, unsigned *cnt) @@ -408,6 +582,83 @@ exit: return err; } +#if CONFIG_LIBVMAF_METADATA_ENABLED +static int parse_metadata_handlers(AVFilterContext *ctx) +{ + LIBVMAFContext *s = ctx->priv; + AVDictionary **dict; + unsigned dict_cnt; + int err = 0; + + if (!s->metadata_feature_cfg) + return 0; + + dict_cnt = 0; + dict = delimited_dict_parse(s->metadata_feature_cfg, &dict_cnt); + if (!dict) { + av_log(ctx, AV_LOG_ERROR, + "could not parse metadata feature config: %s\n", + s->metadata_feature_cfg); + return AVERROR(EINVAL); + } + + for (unsigned i = 0; i < dict_cnt; i++) { + VmafMetadataConfiguration *metadata_cfg = av_calloc(1, sizeof(*metadata_cfg)); + const AVDictionaryEntry *e = NULL; + char *feature_name = NULL; + + while (e = av_dict_iterate(dict[i], e)) { + if (!strcmp(e->key, "name")) { + metadata_cfg->feature_name = av_strdup(e->value); + continue; + } + } + + metadata_cfg->data = s->cb; + metadata_cfg->callback = &set_meta; + + err = vmaf_register_metadata_handler(s->vmaf, *metadata_cfg); + if (err) { + av_log(ctx, AV_LOG_ERROR, + "problem during vmaf_register_metadata_handler: %s\n", + feature_name); + goto exit; + } + + s->metadata_cfg_list.metadata_cfgs = av_realloc(s->metadata_cfg_list.metadata_cfgs, + (s->metadata_cfg_list.metadata_cfg_cnt + 1) * + sizeof(*s->metadata_cfg_list.metadata_cfgs)); + if (!s->metadata_cfg_list.metadata_cfgs) { + err = AVERROR(ENOMEM); + goto exit; + } + + s->metadata_cfg_list.metadata_cfgs[s->metadata_cfg_list.metadata_cfg_cnt++] = *metadata_cfg; + } + +exit: + for (unsigned i = 0; i < dict_cnt; i++) { + if (dict[i]) + av_dict_free(&dict[i]); + } + av_free(dict); + return err; +} + +static int init_metadata(AVFilterContext *ctx) +{ + LIBVMAFContext *s = ctx->priv; + + s->cb = av_calloc(1, sizeof(CallbackStruct)); + if (!s->cb) + return AVERROR(ENOMEM); + + s->cb->s = s; + + return 0; +} +#endif + static enum VmafLogLevel log_level_map(int log_level) { switch (log_level) { @@ -441,6 +692,16 @@ static av_cold int init(AVFilterContext *ctx) if (err) return AVERROR(EINVAL); +#if CONFIG_LIBVMAF_METADATA_ENABLED + err = init_metadata(ctx); + if (err) + return err; + + err = parse_metadata_handlers(ctx); + if (err) + return err; +#endif + err = parse_models(ctx); if (err) return err; @@ -518,6 +779,38 @@ static int config_output(AVFilterLink *outlink) static int activate(AVFilterContext *ctx) { LIBVMAFContext *s = ctx->priv; +#if CONFIG_LIBVMAF_METADATA_ENABLED + // There are 2 cases for metadata propagation: + // 1. Where the case that outlink closes + // 2. Where inlink closes + // Case 1: + // In this case we need check outlink somehow for the status in every iteration. + // If outlink is not wanting frame anymore, we need to proceed with uninit with setting inlink. + // But nature of multithreading settting eof inside the activate call can make sync issues and + // can lead to extra propagated frames. Atomic variables are used to avoid this. + // Case 2: + // This case relatively easy to handle. Because of calculation of vmaf score takes time + // So `do_vmaf` buffers many of frames before sending to outlink that causes + // premature close of outlink. + // Checking inlink status is enough and if inlink == eof flushing vmaf is enough for this. + int64_t pts; + int status, ret = 0; + + if (ff_outlink_get_status(ctx->outputs[0])) + s->outlink_eof = 1; + + if (ff_inlink_acknowledge_status(ctx->inputs[0], &status, &pts) && + ff_inlink_acknowledge_status(ctx->inputs[1], &status, &pts)) { + if (!s->flushed) { + ret = vmaf_read_pictures(s->vmaf, NULL, NULL, 0); + if (ret) + av_log(ctx, AV_LOG_ERROR, + "problem flushing libvmaf context.\n"); + else + s->flushed = 1; + } + } +#endif return ff_framesync_activate(&s->fs); } @@ -556,21 +849,52 @@ static av_cold void uninit(AVFilterContext *ctx) LIBVMAFContext *s = ctx->priv; int err = 0; +#if CONFIG_LIBVMAF_METADATA_ENABLED + if (!s->outlink_eof) + s->outlink_eof = 1; +#endif + ff_framesync_uninit(&s->fs); if (!s->frame_cnt) goto clean_up; - err = vmaf_read_pictures(s->vmaf, NULL, NULL, 0); + if (!s->flushed) { + err = vmaf_read_pictures(s->vmaf, NULL, NULL, 0); + if (err) { + av_log(ctx, AV_LOG_ERROR, + "problem flushing libvmaf context.\n"); + } else + s->flushed = 1; + } + +#if CONFIG_LIBVMAF_METADATA_ENABLED + if (s->metadata_cfg_list.metadata_cfgs) { + for (unsigned i = 0; i < s->metadata_cfg_list.metadata_cfg_cnt; i++) { + av_free(s->metadata_cfg_list.metadata_cfgs[i].feature_name); + } + av_free(s->metadata_cfg_list.metadata_cfgs); + } + + err = free_frame_list(&s->cb->frame_list); if (err) { av_log(ctx, AV_LOG_ERROR, - "problem flushing libvmaf context.\n"); + "problem freeing frame list.\n"); } +#endif for (unsigned i = 0; i < s->model_cnt; i++) { double vmaf_score; + +#if CONFIG_LIBVMAF_METADATA_ENABLED + err = vmaf_score_pooled(s->vmaf, s->model[i], pool_method_map(s->pool), + &vmaf_score, 0, s->eof_frame); + av_log(ctx, AV_LOG_DEBUG, "frame: %d frame_cnt %d\n", s->eof_frame, s->frame_cnt - 1); +#else err = vmaf_score_pooled(s->vmaf, s->model[i], pool_method_map(s->pool), &vmaf_score, 0, s->frame_cnt - 1); +#endif + if (err) { av_log(ctx, AV_LOG_ERROR, "problem getting pooled vmaf score.\n");