diff mbox series

[FFmpeg-devel] avformat/matroskadec: Prevent expensive get_cue_desc lookups

Message ID 20230330200714.36221-1-tom@jwplayer.com
State New
Headers show
Series [FFmpeg-devel] avformat/matroskadec: Prevent expensive get_cue_desc lookups | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Tom Boshoven March 30, 2023, 8:07 p.m. UTC
Due to the way the bandwidth estimation code works, the get_cue_desc
function was called many times over.
This function did a from-scratch lookup using a timestamp.
This lookup was done using linear iteration (O(n)), resulting in
significant overhead when many cues exist.

This change uses ff_index_search_timestamp (binary search, O(log n))
for the lookup.
Additionally, the lookup is prevented entirely in most cases by
maintaining the indexes of the cues (O(1)).

Signed-off-by: Tom Boshoven <tom@jwplayer.com>
---
 libavformat/matroskadec.c | 64 +++++++++++++++++++++++----------------
 1 file changed, 38 insertions(+), 26 deletions(-)

Comments

Tom Boshoven Aug. 1, 2023, 8:44 p.m. UTC | #1
On Thu, Mar 30, 2023 at 4:07 PM Tom Boshoven <tom@jwplayer.com> wrote:
>
> Due to the way the bandwidth estimation code works, the get_cue_desc
> function was called many times over.
> This function did a from-scratch lookup using a timestamp.
> This lookup was done using linear iteration (O(n)), resulting in
> significant overhead when many cues exist.
>
> This change uses ff_index_search_timestamp (binary search, O(log n))
> for the lookup.
> Additionally, the lookup is prevented entirely in most cases by
> maintaining the indexes of the cues (O(1)).
>
> Signed-off-by: Tom Boshoven <tom@jwplayer.com>
> ---
>  libavformat/matroskadec.c | 64 +++++++++++++++++++++++----------------
>  1 file changed, 38 insertions(+), 26 deletions(-)
>
> diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
> index 3a888e3ada..d974944945 100644
> --- a/libavformat/matroskadec.c
> +++ b/libavformat/matroskadec.c
> @@ -4038,35 +4038,45 @@ typedef struct {
>      int64_t end_offset;
>  } CueDesc;
>
> -/* This function searches all the Cues and returns the CueDesc corresponding to
> - * the timestamp ts. Returned CueDesc will be such that start_time_ns <= ts <
> - * end_time_ns. All 4 fields will be set to -1 if ts >= file's duration or
> - * if an error occurred.
> +/**
> + * Find the index of the cue corresponding to the timestamp ts.
>   */
> -static CueDesc get_cue_desc(AVFormatContext *s, int64_t ts, int64_t cues_start) {
> +static int get_cue_index(AVFormatContext *s, int64_t ts) {
> +    MatroskaDemuxContext *matroska = s->priv_data;
> +    FFStream *const sti = ffstream(s->streams[0]);
> +
> +    if (ts >= (int64_t)(matroska->duration * matroska->time_scale))
> +        return -1;
> +
> +    // Look up the index entry corresponding to the timestamp (dividing by timescale)
> +    return ff_index_search_timestamp(
> +        sti->index_entries,
> +        sti->nb_index_entries,
> +        ts / matroska->time_scale,
> +        AVSEEK_FLAG_ANY | AVSEEK_FLAG_BACKWARD
> +    );
> +}
> +
> +/**
> + * Get the CueDesc for a specific index.
> + * For a given timestamp, the index can be obtained using get_cue_index.
> + */
> +static CueDesc get_cue_desc_from_index(AVFormatContext *s, int idx, int64_t cues_start) {
>      MatroskaDemuxContext *matroska = s->priv_data;
>      FFStream *const sti = ffstream(s->streams[0]);
>      AVIndexEntry *const index_entries = sti->index_entries;
>      int nb_index_entries = sti->nb_index_entries;
>      CueDesc cue_desc;
> -    int i;
>
> -    if (ts >= (int64_t)(matroska->duration * matroska->time_scale))
> +    if (idx < 0 || idx >= nb_index_entries) {
>          return (CueDesc) {-1, -1, -1, -1};
> -    for (i = 1; i < nb_index_entries; i++) {
> -        if (index_entries[i - 1].timestamp * matroska->time_scale <= ts &&
> -            index_entries[i].timestamp * matroska->time_scale > ts) {
> -            break;
> -        }
>      }
> -    --i;
> -    if (index_entries[i].timestamp > matroska->duration)
> -        return (CueDesc) {-1, -1, -1, -1};
> -    cue_desc.start_time_ns = index_entries[i].timestamp * matroska->time_scale;
> -    cue_desc.start_offset = index_entries[i].pos - matroska->segment_start;
> -    if (i != nb_index_entries - 1) {
> -        cue_desc.end_time_ns = index_entries[i + 1].timestamp * matroska->time_scale;
> -        cue_desc.end_offset = index_entries[i + 1].pos - matroska->segment_start;
> +
> +    cue_desc.start_time_ns = index_entries[idx].timestamp * matroska->time_scale;
> +    cue_desc.start_offset = index_entries[idx].pos - matroska->segment_start;
> +    if (idx != nb_index_entries - 1) {
> +        cue_desc.end_time_ns = index_entries[idx + 1].timestamp * matroska->time_scale;
> +        cue_desc.end_offset = index_entries[idx + 1].pos - matroska->segment_start;
>      } else {
>          cue_desc.end_time_ns = matroska->duration * matroska->time_scale;
>          // FIXME: this needs special handling for files where Cues appear
> @@ -4140,7 +4150,8 @@ static int buffer_size_after_time_downloaded(int64_t time_ns, double search_sec,
>      int64_t time_to_search_ns = (int64_t)(search_sec * nano_seconds_per_second);
>      int64_t end_time_ns = time_ns + time_to_search_ns;
>      double sec_downloaded = 0.0;
> -    CueDesc desc_curr = get_cue_desc(s, time_ns, cues_start);
> +    int cue_idx = get_cue_index(s, time_ns);
> +    CueDesc desc_curr = get_cue_desc_from_index(s, cue_idx, cues_start);
>      if (desc_curr.start_time_ns == -1)
>        return -1;
>      *sec_to_download = 0.0;
> @@ -4168,7 +4179,7 @@ static int buffer_size_after_time_downloaded(int64_t time_ns, double search_sec,
>        }
>
>        // Get the next Cue.
> -      desc_curr = get_cue_desc(s, desc_curr.end_time_ns, cues_start);
> +      desc_curr = get_cue_desc_from_index(s, ++cue_idx, cues_start);
>      }
>
>      while (desc_curr.start_time_ns != -1) {
> @@ -4197,7 +4208,7 @@ static int buffer_size_after_time_downloaded(int64_t time_ns, double search_sec,
>              break;
>          }
>
> -        desc_curr = get_cue_desc(s, desc_curr.end_time_ns, cues_start);
> +        desc_curr = get_cue_desc_from_index(s, ++cue_idx, cues_start);
>      }
>      *buffer = *buffer + sec_downloaded;
>      return rv;
> @@ -4226,7 +4237,8 @@ static int64_t webm_dash_manifest_compute_bandwidth(AVFormatContext *s, int64_t
>          int64_t temp_prebuffer_ns = prebuffer_ns;
>          int64_t pre_bytes, pre_ns;
>          double pre_sec, prebuffer, bits_per_second;
> -        CueDesc desc_beg = get_cue_desc(s, time_ns, cues_start);
> +        int cue_idx = get_cue_index(s, time_ns);
> +        CueDesc desc_beg = get_cue_desc_from_index(s, cue_idx, cues_start);
>
>          // Start with the first Cue.
>          CueDesc desc_end = desc_beg;
> @@ -4237,7 +4249,7 @@ static int64_t webm_dash_manifest_compute_bandwidth(AVFormatContext *s, int64_t
>              // Prebuffered the entire Cue.
>              prebuffer_bytes += desc_end.end_offset - desc_end.start_offset;
>              temp_prebuffer_ns -= desc_end.end_time_ns - desc_end.start_time_ns;
> -            desc_end = get_cue_desc(s, desc_end.end_time_ns, cues_start);
> +            desc_end = get_cue_desc_from_index(s, ++cue_idx, cues_start);
>          }
>          if (desc_end.start_time_ns == -1) {
>              // The prebuffer is larger than the duration.
> @@ -4295,7 +4307,7 @@ static int64_t webm_dash_manifest_compute_bandwidth(AVFormatContext *s, int64_t
>                      }
>                  }
>
> -                desc_end = get_cue_desc(s, desc_end.end_time_ns, cues_start);
> +                desc_end = get_cue_desc_from_index(s, ++cue_idx, cues_start);
>              } while (desc_end.start_time_ns != -1);
>          }
>          if (bandwidth < bits_per_second) bandwidth = bits_per_second;
> --
> 2.40.0
>

Is there any interest in this performance optimization?
I'd be happy to bring it up to date and make changes if needed.
Tom Boshoven Nov. 29, 2023, 7:35 p.m. UTC | #2
On Thu, Mar 30, 2023 at 4:07 PM Tom Boshoven <tom@jwplayer.com> wrote:
>
> Due to the way the bandwidth estimation code works, the get_cue_desc
> function was called many times over.
> This function did a from-scratch lookup using a timestamp.
> This lookup was done using linear iteration (O(n)), resulting in
> significant overhead when many cues exist.
>
> This change uses ff_index_search_timestamp (binary search, O(log n))
> for the lookup.
> Additionally, the lookup is prevented entirely in most cases by
> maintaining the indexes of the cues (O(1)).
>
> Signed-off-by: Tom Boshoven <tom@jwplayer.com>
> ---
>  libavformat/matroskadec.c | 64 +++++++++++++++++++++++----------------
>  1 file changed, 38 insertions(+), 26 deletions(-)
>
> diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
> index 3a888e3ada..d974944945 100644
> --- a/libavformat/matroskadec.c
> +++ b/libavformat/matroskadec.c
> @@ -4038,35 +4038,45 @@ typedef struct {
>      int64_t end_offset;
>  } CueDesc;
>
> -/* This function searches all the Cues and returns the CueDesc corresponding to
> - * the timestamp ts. Returned CueDesc will be such that start_time_ns <= ts <
> - * end_time_ns. All 4 fields will be set to -1 if ts >= file's duration or
> - * if an error occurred.
> +/**
> + * Find the index of the cue corresponding to the timestamp ts.
>   */
> -static CueDesc get_cue_desc(AVFormatContext *s, int64_t ts, int64_t cues_start) {
> +static int get_cue_index(AVFormatContext *s, int64_t ts) {
> +    MatroskaDemuxContext *matroska = s->priv_data;
> +    FFStream *const sti = ffstream(s->streams[0]);
> +
> +    if (ts >= (int64_t)(matroska->duration * matroska->time_scale))
> +        return -1;
> +
> +    // Look up the index entry corresponding to the timestamp (dividing by timescale)
> +    return ff_index_search_timestamp(
> +        sti->index_entries,
> +        sti->nb_index_entries,
> +        ts / matroska->time_scale,
> +        AVSEEK_FLAG_ANY | AVSEEK_FLAG_BACKWARD
> +    );
> +}
> +
> +/**
> + * Get the CueDesc for a specific index.
> + * For a given timestamp, the index can be obtained using get_cue_index.
> + */
> +static CueDesc get_cue_desc_from_index(AVFormatContext *s, int idx, int64_t cues_start) {
>      MatroskaDemuxContext *matroska = s->priv_data;
>      FFStream *const sti = ffstream(s->streams[0]);
>      AVIndexEntry *const index_entries = sti->index_entries;
>      int nb_index_entries = sti->nb_index_entries;
>      CueDesc cue_desc;
> -    int i;
>
> -    if (ts >= (int64_t)(matroska->duration * matroska->time_scale))
> +    if (idx < 0 || idx >= nb_index_entries) {
>          return (CueDesc) {-1, -1, -1, -1};
> -    for (i = 1; i < nb_index_entries; i++) {
> -        if (index_entries[i - 1].timestamp * matroska->time_scale <= ts &&
> -            index_entries[i].timestamp * matroska->time_scale > ts) {
> -            break;
> -        }
>      }
> -    --i;
> -    if (index_entries[i].timestamp > matroska->duration)
> -        return (CueDesc) {-1, -1, -1, -1};
> -    cue_desc.start_time_ns = index_entries[i].timestamp * matroska->time_scale;
> -    cue_desc.start_offset = index_entries[i].pos - matroska->segment_start;
> -    if (i != nb_index_entries - 1) {
> -        cue_desc.end_time_ns = index_entries[i + 1].timestamp * matroska->time_scale;
> -        cue_desc.end_offset = index_entries[i + 1].pos - matroska->segment_start;
> +
> +    cue_desc.start_time_ns = index_entries[idx].timestamp * matroska->time_scale;
> +    cue_desc.start_offset = index_entries[idx].pos - matroska->segment_start;
> +    if (idx != nb_index_entries - 1) {
> +        cue_desc.end_time_ns = index_entries[idx + 1].timestamp * matroska->time_scale;
> +        cue_desc.end_offset = index_entries[idx + 1].pos - matroska->segment_start;
>      } else {
>          cue_desc.end_time_ns = matroska->duration * matroska->time_scale;
>          // FIXME: this needs special handling for files where Cues appear
> @@ -4140,7 +4150,8 @@ static int buffer_size_after_time_downloaded(int64_t time_ns, double search_sec,
>      int64_t time_to_search_ns = (int64_t)(search_sec * nano_seconds_per_second);
>      int64_t end_time_ns = time_ns + time_to_search_ns;
>      double sec_downloaded = 0.0;
> -    CueDesc desc_curr = get_cue_desc(s, time_ns, cues_start);
> +    int cue_idx = get_cue_index(s, time_ns);
> +    CueDesc desc_curr = get_cue_desc_from_index(s, cue_idx, cues_start);
>      if (desc_curr.start_time_ns == -1)
>        return -1;
>      *sec_to_download = 0.0;
> @@ -4168,7 +4179,7 @@ static int buffer_size_after_time_downloaded(int64_t time_ns, double search_sec,
>        }
>
>        // Get the next Cue.
> -      desc_curr = get_cue_desc(s, desc_curr.end_time_ns, cues_start);
> +      desc_curr = get_cue_desc_from_index(s, ++cue_idx, cues_start);
>      }
>
>      while (desc_curr.start_time_ns != -1) {
> @@ -4197,7 +4208,7 @@ static int buffer_size_after_time_downloaded(int64_t time_ns, double search_sec,
>              break;
>          }
>
> -        desc_curr = get_cue_desc(s, desc_curr.end_time_ns, cues_start);
> +        desc_curr = get_cue_desc_from_index(s, ++cue_idx, cues_start);
>      }
>      *buffer = *buffer + sec_downloaded;
>      return rv;
> @@ -4226,7 +4237,8 @@ static int64_t webm_dash_manifest_compute_bandwidth(AVFormatContext *s, int64_t
>          int64_t temp_prebuffer_ns = prebuffer_ns;
>          int64_t pre_bytes, pre_ns;
>          double pre_sec, prebuffer, bits_per_second;
> -        CueDesc desc_beg = get_cue_desc(s, time_ns, cues_start);
> +        int cue_idx = get_cue_index(s, time_ns);
> +        CueDesc desc_beg = get_cue_desc_from_index(s, cue_idx, cues_start);
>
>          // Start with the first Cue.
>          CueDesc desc_end = desc_beg;
> @@ -4237,7 +4249,7 @@ static int64_t webm_dash_manifest_compute_bandwidth(AVFormatContext *s, int64_t
>              // Prebuffered the entire Cue.
>              prebuffer_bytes += desc_end.end_offset - desc_end.start_offset;
>              temp_prebuffer_ns -= desc_end.end_time_ns - desc_end.start_time_ns;
> -            desc_end = get_cue_desc(s, desc_end.end_time_ns, cues_start);
> +            desc_end = get_cue_desc_from_index(s, ++cue_idx, cues_start);
>          }
>          if (desc_end.start_time_ns == -1) {
>              // The prebuffer is larger than the duration.
> @@ -4295,7 +4307,7 @@ static int64_t webm_dash_manifest_compute_bandwidth(AVFormatContext *s, int64_t
>                      }
>                  }
>
> -                desc_end = get_cue_desc(s, desc_end.end_time_ns, cues_start);
> +                desc_end = get_cue_desc_from_index(s, ++cue_idx, cues_start);
>              } while (desc_end.start_time_ns != -1);
>          }
>          if (bandwidth < bits_per_second) bandwidth = bits_per_second;
> --
> 2.40.0
>

Is there any interest in this matroskadec performance patch I proposed
earlier this year?
Happy to bring it up to date.
diff mbox series

Patch

diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index 3a888e3ada..d974944945 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -4038,35 +4038,45 @@  typedef struct {
     int64_t end_offset;
 } CueDesc;
 
-/* This function searches all the Cues and returns the CueDesc corresponding to
- * the timestamp ts. Returned CueDesc will be such that start_time_ns <= ts <
- * end_time_ns. All 4 fields will be set to -1 if ts >= file's duration or
- * if an error occurred.
+/**
+ * Find the index of the cue corresponding to the timestamp ts.
  */
-static CueDesc get_cue_desc(AVFormatContext *s, int64_t ts, int64_t cues_start) {
+static int get_cue_index(AVFormatContext *s, int64_t ts) {
+    MatroskaDemuxContext *matroska = s->priv_data;
+    FFStream *const sti = ffstream(s->streams[0]);
+
+    if (ts >= (int64_t)(matroska->duration * matroska->time_scale))
+        return -1;
+
+    // Look up the index entry corresponding to the timestamp (dividing by timescale)
+    return ff_index_search_timestamp(
+        sti->index_entries,
+        sti->nb_index_entries,
+        ts / matroska->time_scale,
+        AVSEEK_FLAG_ANY | AVSEEK_FLAG_BACKWARD
+    );
+}
+
+/**
+ * Get the CueDesc for a specific index.
+ * For a given timestamp, the index can be obtained using get_cue_index.
+ */
+static CueDesc get_cue_desc_from_index(AVFormatContext *s, int idx, int64_t cues_start) {
     MatroskaDemuxContext *matroska = s->priv_data;
     FFStream *const sti = ffstream(s->streams[0]);
     AVIndexEntry *const index_entries = sti->index_entries;
     int nb_index_entries = sti->nb_index_entries;
     CueDesc cue_desc;
-    int i;
 
-    if (ts >= (int64_t)(matroska->duration * matroska->time_scale))
+    if (idx < 0 || idx >= nb_index_entries) {
         return (CueDesc) {-1, -1, -1, -1};
-    for (i = 1; i < nb_index_entries; i++) {
-        if (index_entries[i - 1].timestamp * matroska->time_scale <= ts &&
-            index_entries[i].timestamp * matroska->time_scale > ts) {
-            break;
-        }
     }
-    --i;
-    if (index_entries[i].timestamp > matroska->duration)
-        return (CueDesc) {-1, -1, -1, -1};
-    cue_desc.start_time_ns = index_entries[i].timestamp * matroska->time_scale;
-    cue_desc.start_offset = index_entries[i].pos - matroska->segment_start;
-    if (i != nb_index_entries - 1) {
-        cue_desc.end_time_ns = index_entries[i + 1].timestamp * matroska->time_scale;
-        cue_desc.end_offset = index_entries[i + 1].pos - matroska->segment_start;
+
+    cue_desc.start_time_ns = index_entries[idx].timestamp * matroska->time_scale;
+    cue_desc.start_offset = index_entries[idx].pos - matroska->segment_start;
+    if (idx != nb_index_entries - 1) {
+        cue_desc.end_time_ns = index_entries[idx + 1].timestamp * matroska->time_scale;
+        cue_desc.end_offset = index_entries[idx + 1].pos - matroska->segment_start;
     } else {
         cue_desc.end_time_ns = matroska->duration * matroska->time_scale;
         // FIXME: this needs special handling for files where Cues appear
@@ -4140,7 +4150,8 @@  static int buffer_size_after_time_downloaded(int64_t time_ns, double search_sec,
     int64_t time_to_search_ns = (int64_t)(search_sec * nano_seconds_per_second);
     int64_t end_time_ns = time_ns + time_to_search_ns;
     double sec_downloaded = 0.0;
-    CueDesc desc_curr = get_cue_desc(s, time_ns, cues_start);
+    int cue_idx = get_cue_index(s, time_ns);
+    CueDesc desc_curr = get_cue_desc_from_index(s, cue_idx, cues_start);
     if (desc_curr.start_time_ns == -1)
       return -1;
     *sec_to_download = 0.0;
@@ -4168,7 +4179,7 @@  static int buffer_size_after_time_downloaded(int64_t time_ns, double search_sec,
       }
 
       // Get the next Cue.
-      desc_curr = get_cue_desc(s, desc_curr.end_time_ns, cues_start);
+      desc_curr = get_cue_desc_from_index(s, ++cue_idx, cues_start);
     }
 
     while (desc_curr.start_time_ns != -1) {
@@ -4197,7 +4208,7 @@  static int buffer_size_after_time_downloaded(int64_t time_ns, double search_sec,
             break;
         }
 
-        desc_curr = get_cue_desc(s, desc_curr.end_time_ns, cues_start);
+        desc_curr = get_cue_desc_from_index(s, ++cue_idx, cues_start);
     }
     *buffer = *buffer + sec_downloaded;
     return rv;
@@ -4226,7 +4237,8 @@  static int64_t webm_dash_manifest_compute_bandwidth(AVFormatContext *s, int64_t
         int64_t temp_prebuffer_ns = prebuffer_ns;
         int64_t pre_bytes, pre_ns;
         double pre_sec, prebuffer, bits_per_second;
-        CueDesc desc_beg = get_cue_desc(s, time_ns, cues_start);
+        int cue_idx = get_cue_index(s, time_ns);
+        CueDesc desc_beg = get_cue_desc_from_index(s, cue_idx, cues_start);
 
         // Start with the first Cue.
         CueDesc desc_end = desc_beg;
@@ -4237,7 +4249,7 @@  static int64_t webm_dash_manifest_compute_bandwidth(AVFormatContext *s, int64_t
             // Prebuffered the entire Cue.
             prebuffer_bytes += desc_end.end_offset - desc_end.start_offset;
             temp_prebuffer_ns -= desc_end.end_time_ns - desc_end.start_time_ns;
-            desc_end = get_cue_desc(s, desc_end.end_time_ns, cues_start);
+            desc_end = get_cue_desc_from_index(s, ++cue_idx, cues_start);
         }
         if (desc_end.start_time_ns == -1) {
             // The prebuffer is larger than the duration.
@@ -4295,7 +4307,7 @@  static int64_t webm_dash_manifest_compute_bandwidth(AVFormatContext *s, int64_t
                     }
                 }
 
-                desc_end = get_cue_desc(s, desc_end.end_time_ns, cues_start);
+                desc_end = get_cue_desc_from_index(s, ++cue_idx, cues_start);
             } while (desc_end.start_time_ns != -1);
         }
         if (bandwidth < bits_per_second) bandwidth = bits_per_second;