diff mbox series

[FFmpeg-devel,WIP] avisynth: support variable fps

Message ID 20241006220238.16196-1-qyot27@gmail.com
State New
Headers show
Series [FFmpeg-devel,WIP] avisynth: support variable fps | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished

Commit Message

Stephen Hutchinson Oct. 6, 2024, 10:02 p.m. UTC
Draft #1.  For testing reasons, the patch turns on vfr mode
by default so it can be tested more quickly.  The seeking
issues described below are problematic enough that this
behavior would be reversed when actually committed, unless
it can be satisfactorily resolved before then.

Currently has only been tested with GCC 13 (Ubuntu) and 14 (mingw-w64).

What works:
* Use _DurationNum and _DurationDen frame properties (if present)
  to set the pkt->[pts|dts|duration].
* If the properties are not present, fall back to the legacy
  CFR mode, as well as allow users to switch back by choice with
  the existing -avisynth_flags option ('-avisynth_flags -vfr').
* Transcoding to other formats (ex. ffv1 in mkv) retains the
  adjusted packet durations correctly.

What doesn't work:
* As reported by ffprobe, the original mkv I was using as a
  test file has an offset between the pts and dts, as if the
  first couple of frames get skipped. While the logic *does*
  perform a similar enough skew to make the audio sync correct,
  the original mkv lists the dtses as N/A.
* Even though the ptses match that of the original mkv file,
  transcoding into another mkv reports back 'starting new
  cluster due to timestamp' warnings, and the ffmpeg-created
  mkv file has some strange audio seek behavior.  Remuxing
  with mkvmerge fixes it, so I'm not sure if this is really
  something to worry about with the avisynth demuxer or not.
* Seeking is broken.  The original file can seek without issues,
  and instantaneously.  Files transcoded from the script act far
  more like, if not identically, to the original file.  Seeking
  *forward* in the script sometimes works, but is slow, and seeking
  backward is completely non-functional.  The size of the seeks
  are also seemingly not predictable; a minute or two in one area,
  or 15 minutes the next.

Errata:
* There are currently entries added to the AviSynthContext struct
  to calculate the frame's framerate (avs->curr_fps[num|den]).
  This was used in some experiments, but didn't lead to any fixes
  in the short term. I don't know if maybe there is a utility to
  this in some form that would actually be useful, which is why
  it's still here in the draft.
* It occurs to me that comparing the seek behavior with mkv (or
  any other container capable of variable frame durations) is
  potentially like comparing apples and oranges because those
  containers have additional structures (e.g. an index) that
  handles some of the load of making sure seeking can be done
  accurately by their corresponding demuxers.  This would mean
  that such an index would need to be present for the script
  to actually get functional seeking in vfr mode: whether by
  it being in the frame properties or - at worst - exposed by
  the AviSynth+ core itself through an api function or entirely
  new structure, in which case vfr-mode seeking would have to
  be shelved until those things appear.
* Clearly, if some of the things (all the scaledur/curr_* math)
  could be achieved with pre-existing functions, that would be
  better than having to handle those calculations the way I did.
  That's why this is a draft.
---
 libavformat/avisynth.c | 95 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 82 insertions(+), 13 deletions(-)
diff mbox series

Patch

diff --git a/libavformat/avisynth.c b/libavformat/avisynth.c
index cb2be10925..958e880407 100644
--- a/libavformat/avisynth.c
+++ b/libavformat/avisynth.c
@@ -53,6 +53,7 @@ 
 #endif
 
 #include <avisynth/avisynth_c.h>
+#include <stdbool.h>
 
 typedef struct AviSynthLibrary {
     void *library;
@@ -96,6 +97,7 @@  typedef enum AviSynthFlags {
     AVISYNTH_FRAMEPROP_MATRIX = (1 << 4),
     AVISYNTH_FRAMEPROP_CHROMA_LOCATION = (1 << 5),
     AVISYNTH_FRAMEPROP_SAR = (1 << 6),
+    AVISYNTH_FRAMEPROP_VFR = (1 << 7),
 } AviSynthFlags;
 
 typedef struct AviSynthContext {
@@ -115,6 +117,15 @@  typedef struct AviSynthContext {
     int error;
 
     uint32_t flags;
+    bool is_vfr;
+    int scaled_durnum;
+    int scaled_durden;
+
+    int curr_duration;
+    int total_duration;
+    int curr_fpsnum;
+    int curr_fpsden;
+
     struct AviSynthLibrary avs_library;
 } AviSynthContext;
 
@@ -230,13 +241,6 @@  static int avisynth_create_stream_video(AVFormatContext *s, AVStream *st)
     st->codecpar->width      = avs->vi->width;
     st->codecpar->height     = avs->vi->height;
 
-    st->avg_frame_rate    = (AVRational) { avs->vi->fps_numerator,
-                                           avs->vi->fps_denominator };
-    st->start_time        = 0;
-    st->duration          = avs->vi->num_frames;
-    st->nb_frames         = avs->vi->num_frames;
-    avpriv_set_pts_info(st, 32, avs->vi->fps_denominator, avs->vi->fps_numerator);
-
 
     switch (avs->vi->pixel_type) {
     /* 10~16-bit YUV pix_fmts (AviSynth+) */
@@ -721,6 +725,21 @@  static int avisynth_create_stream_video(AVFormatContext *s, AVStream *st)
             st->sample_aspect_ratio = (AVRational){ sar_num, sar_den };
         }
 
+        /* Variable frame rate */
+        if(avs->flags & AVISYNTH_FRAMEPROP_VFR) {
+            if((avs->avs_library.avs_prop_get_type(avs->env, avsmap, "_DurationDen") == AVS_PROPTYPE_UNSET) ||
+               (avs->avs_library.avs_prop_get_type(avs->env, avsmap, "_DurationNum") == AVS_PROPTYPE_UNSET)) {
+                avs->is_vfr = false;
+                avpriv_set_pts_info(st, 32, avs->vi->fps_denominator, avs->vi->fps_numerator);
+            } else {
+                avs->is_vfr = true;
+                avpriv_set_pts_info(st, 64, 1, 1000);
+            }
+        } else {
+            avs->is_vfr = false;
+            avpriv_set_pts_info(st, 32, avs->vi->fps_denominator, avs->vi->fps_numerator);
+        }
+
         avs->avs_library.avs_release_video_frame(frame);
     } else {
         st->codecpar->field_order = AV_FIELD_UNKNOWN;
@@ -737,6 +756,16 @@  static int avisynth_create_stream_video(AVFormatContext *s, AVStream *st)
         }
     }
 
+    if (avs->is_vfr == false) {
+        st->avg_frame_rate    = (AVRational) { avs->vi->fps_numerator,
+                                               avs->vi->fps_denominator };
+        st->start_time        = 0;
+        st->duration          = avs->vi->num_frames;
+        st->nb_frames         = avs->vi->num_frames;
+    } else {
+        st->start_time        = AV_NOPTS_VALUE;
+    }
+
     return 0;
 }
 
@@ -904,7 +933,9 @@  static int avisynth_read_packet_video(AVFormatContext *s, AVPacket *pkt,
     unsigned char *dst_p;
     const unsigned char *src_p;
     int n, i, plane, rowsize, planeheight, pitch, bits, ret;
+    float scaledur;
     const char *error;
+    AVRational dur;
 
     if (avs->curr_frame >= avs->vi->num_frames)
         return AVERROR_EOF;
@@ -926,12 +957,48 @@  static int avisynth_read_packet_video(AVFormatContext *s, AVPacket *pkt,
     if ((ret = av_new_packet(pkt, pkt->size)) < 0)
         return ret;
 
-    pkt->pts      = n;
-    pkt->dts      = n;
-    pkt->duration = 1;
-    pkt->stream_index = avs->curr_stream;
-
     frame = avs->avs_library.avs_get_frame(avs->clip, n);
+
+    if (avs->avs_library.avs_get_version(avs->clip) >= 9) {
+        const AVS_Map *avsmap;
+
+        avsmap = avs->avs_library.avs_get_frame_props_ro(avs->env, frame);
+
+        /* Variable frame rate */
+        if (avs->is_vfr == true) {
+            dur.num = avs->avs_library.avs_prop_get_int(avs->env, avsmap, "_DurationNum", 0, &avs->error);
+            dur.den = avs->avs_library.avs_prop_get_int(avs->env, avsmap, "_DurationDen", 0, &avs->error);
+
+            if (dur.den < 1000) {
+                // _Duration[Num/Den] uses simplified numbers rather than always using 1000 as the denominator
+                scaledur = 1000 / dur.den;
+                avs->scaled_durnum = dur.num * scaledur;
+                avs->scaled_durden = dur.den * scaledur;
+            } else {
+                avs->scaled_durnum = dur.num;
+                avs->scaled_durden = dur.den;
+            }
+
+            avs->curr_fpsnum = (dur.den * 1000) / dur.num;
+            avs->curr_fpsden = 1000;
+        }
+
+        avs->curr_duration = avs->scaled_durden * avs->scaled_durnum / 1000;
+        avs->total_duration += avs->curr_duration;
+    }
+
+    if (avs->is_vfr == false) {
+        pkt->pts      = n;
+        pkt->dts      = n;
+        pkt->duration = 1;
+        pkt->stream_index = avs->curr_stream;
+    } else {
+        pkt->pts = avs->total_duration;
+        pkt->dts = avs->total_duration - avs->curr_duration;
+        pkt->duration = avs->curr_duration;
+        pkt->stream_index = avs->curr_stream;
+    }
+
     error = avs->avs_library.avs_clip_get_error(avs->clip);
     if (error) {
         av_log(s, AV_LOG_ERROR, "%s\n", error);
@@ -1126,7 +1193,8 @@  static int avisynth_read_seek(AVFormatContext *s, int stream_index,
 
 #define AVISYNTH_FRAMEPROP_DEFAULT AVISYNTH_FRAMEPROP_FIELD_ORDER | AVISYNTH_FRAMEPROP_RANGE | \
                                    AVISYNTH_FRAMEPROP_PRIMARIES | AVISYNTH_FRAMEPROP_TRANSFER | \
-                                   AVISYNTH_FRAMEPROP_MATRIX | AVISYNTH_FRAMEPROP_CHROMA_LOCATION
+                                   AVISYNTH_FRAMEPROP_MATRIX | AVISYNTH_FRAMEPROP_CHROMA_LOCATION | \
+                                   AVISYNTH_FRAMEPROP_VFR
 #define OFFSET(x) offsetof(AviSynthContext, x)
 static const AVOption avisynth_options[] = {
     { "avisynth_flags", "set flags related to reading frame properties from script (AviSynth+ v3.7.1 or higher)", OFFSET(flags), AV_OPT_TYPE_FLAGS, {.i64 = AVISYNTH_FRAMEPROP_DEFAULT}, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM, .unit = "flags" },
@@ -1137,6 +1205,7 @@  static const AVOption avisynth_options[] = {
     { "matrix", "read matrix coefficients", 0, AV_OPT_TYPE_CONST, {.i64 = AVISYNTH_FRAMEPROP_MATRIX}, 0, 1, AV_OPT_FLAG_DECODING_PARAM, .unit = "flags" },
     { "chroma_location", "read chroma location", 0, AV_OPT_TYPE_CONST, {.i64 = AVISYNTH_FRAMEPROP_CHROMA_LOCATION}, 0, 1, AV_OPT_FLAG_DECODING_PARAM, .unit = "flags" },
     { "sar", "read sample aspect ratio", 0, AV_OPT_TYPE_CONST, {.i64 = AVISYNTH_FRAMEPROP_SAR}, 0, 1, AV_OPT_FLAG_DECODING_PARAM, .unit = "flags" },
+    { "vfr", "read fps per-frame", 0, AV_OPT_TYPE_CONST, {.i64 = AVISYNTH_FRAMEPROP_VFR}, 0, 1, AV_OPT_FLAG_DECODING_PARAM, .unit = "flags" },
     { NULL },
 };