[FFmpeg-devel,4/5] startcode: Don't return false positives

Submitted by Andreas Rheinhardt on June 9, 2019, 11 a.m.

Details

Message ID 20190609110053.4012-5-andreas.rheinhardt@gmail.com
State New
Headers show

Commit Message

Andreas Rheinhardt June 9, 2019, 11 a.m.
Until now the function ff_startcode_find_candidate_c did not really
search for startcodes (the startcode 0x00 0x00 0x01 (used in
MPEG-1/2/4, VC-1 and H.264/5) is the only startcode meant here). Instead
it searched for zero bytes and returned the earliest position of a zero
byte. This of course led to lots of false positives - millions per GB
of video.
This has been changed: The first position of the buffer that
may be part of a four-byte startcode is now returned. This excludes zero
bytes that are known not to belong to a startcode, but zero bytes at the
end of a buffer that might be part of a startcode whose second part is in
the next buffer are also returned. This is in accordance with the
expectations of the current callers of ff_startcode_find_candidate_c,
namely the H.264 parser and the VC-1 parser. This is also the reason why
"find_candidate" in the name of the function has been kept.

Getting rid of lots of function calls with their accompanying overhead
of course brings a speedup with it: Here are some benchmarks for
a 30.2 Mb/s transport stream A (10 iterations of 8192 runs each) and
another 7.4 Mb/s transport stream B (10 iterations of 131072 runs each)
on an x64 Haswell:

  |   vanilla   | aligned | current: aligned,
  | (unaligned) |  reads  | no false positives
--|-------------|---------|-------------------
A |    411578   |  424503 |    323355
B |     55476   |   58326 |     44147

vanilla refers to the state before the switch to aligned reads;
"aligned reads" refers to the state after the switch to aligned reads.

(Calls to h264_find_frame_end have been timed; given that the amount
of calls to ff_startcode_find_candidate_c has been reduced considerably,
timing them directly would be worthless.)

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@gmail.com>
---
 libavcodec/h264dsp.h   |   7 +--
 libavcodec/startcode.c | 101 +++++++++++++++++++++++++++++++++++------
 libavcodec/vc1dsp.h    |   6 +--
 3 files changed, 91 insertions(+), 23 deletions(-)

Patch hide | download patch | download mbox

diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h
index cbea3173c6..51de7a4e21 100644
--- a/libavcodec/h264dsp.h
+++ b/libavcodec/h264dsp.h
@@ -108,11 +108,8 @@  typedef struct H264DSPContext {
     void (*h264_add_pixels4_clear)(uint8_t *dst, int16_t *block, int stride);
 
     /**
-     * Search buf from the start for up to size bytes. Return the index
-     * of a zero byte, or >= size if not found. Ideally, use lookahead
-     * to filter out any zero bytes that are known to not be followed by
-     * one or more further zero bytes and a one byte. Better still, filter
-     * out any bytes that form the trailing_zero_8bits syntax element too.
+     * Search buf from the start for up to size bytes. Return the first 0x00
+     * that might be part of a (three or four) byte startcode.
      */
     int (*startcode_find_candidate)(const uint8_t *buf, int size);
 } H264DSPContext;
diff --git a/libavcodec/startcode.c b/libavcodec/startcode.c
index 8774109df5..b29c1490c0 100644
--- a/libavcodec/startcode.c
+++ b/libavcodec/startcode.c
@@ -22,35 +22,75 @@ 
  * @file
  * Accelerated start code search function for start codes common to
  * MPEG-1/2/4 video, VC-1, H.264/5
- * @author Michael Niedermayer <michaelni@gmx.at>
+ * @author Michael Niedermayer <michaelni@gmx.at>,
+ * @author Andreas Rheinhardt <andreas.rheinhardt@gmail.com>
  */
 
 #include "startcode.h"
 #include "config.h"
 #include "libavutil/intreadwrite.h"
+#include "libavutil/macros.h"
 
 int ff_startcode_find_candidate_c(const uint8_t *buf, int size)
 {
-    const uint8_t *start = buf, *end = buf + size;
+    const uint8_t *start = buf, *end = buf + size, *temp;
 
 #define INITIALIZATION(mod) do {                                           \
-        if (mod > end - start)                                             \
+        if (end - start <= mod + 1)                                        \
             goto near_end;                                                 \
-        for (; (uintptr_t)buf % mod; buf++)                                \
-            if (!*buf)                                                     \
-                return buf - start;                                        \
+        /* From this point on until the end of the MAIN_LOOP,              \
+         * buf is the earliest possible position of a 0x00                 \
+         * immediately preceding a startcode's 0x01, i.e.                  \
+         * everything from start to buf (inclusive) is known               \
+         * to not contain a startcode's 0x01. */                           \
+        buf += 1;                                                          \
+        temp = (const uint8_t *)FFALIGN((uintptr_t)buf, mod);              \
         /* Effective end for MAIN_LOOP in order not to overread. */        \
-        end -= mod;                                                        \
+        end -= mod + 1;                                                    \
+        goto startcode_check;                                              \
     } while (0)
 
 #define READ(bitness) AV_RN ## bitness ## A
+#define STARTCODE_CHECK(offset) do {                                       \
+        if (buf[2 - offset] > 1) {                                         \
+            buf += 3;                                                      \
+        } else if (buf[1 - offset]) {                                      \
+            buf += 2;                                                      \
+        } else if (buf[-offset] || buf[2 - offset] == 0) {                 \
+            buf += 1;                                                      \
+        } else {                                                           \
+            buf += 1 - offset;                                             \
+            goto found_startcode;                                          \
+        }                                                                  \
+    } while (0)
+
+    /* The MAIN_LOOP tries to read several bytes at once.
+     * A startcode's 0x00 0x01 or 0x00 0x00 will be detected
+     * by it if these bytes are contained within the bytes
+     * read at once. */
 #define MAIN_LOOP(bitness, mask1, mask2) do {                              \
-        for (; buf <= end; buf += bitness / 8)                             \
-            if ((~READ(bitness)(buf) & (READ(bitness)(buf) - mask1))       \
-                                     & mask2)                              \
-                break;                                                     \
+        for (; buf < end; ) {                                              \
+            if (!((~READ(bitness)(buf) & (READ(bitness)(buf) - mask1))     \
+                                       &  mask2)) {                        \
+                buf += bitness / 8;                                        \
+                continue;                                                  \
+            }                                                              \
+            temp = buf + bitness / 8;                                      \
+            /* If the 0x01 of a startcode is at position buf + 1,          \
+             * the following check detects it.                             \
+             * Because buf got incremented before entering this            \
+             * loop, buf - 1 may be evaluated.                             \
+             * Because temp + 1 < start + size, buf is always              \
+             * <= the end of the buffer during the closing check           \
+             * so that the pointer comparison is defined. */               \
+        startcode_check:                                                   \
+            do {                                                           \
+                STARTCODE_CHECK(1);                                        \
+            } while (buf < temp);                                          \
+            buf = temp;                                                    \
+        }                                                                  \
         /* Revert to the real end. */                                      \
-        end += bitness / 8;                                                \
+        end += bitness / 8 + 1;                                            \
     } while (0)
 
 #if HAVE_FAST_64BIT
@@ -61,9 +101,42 @@  int ff_startcode_find_candidate_c(const uint8_t *buf, int size)
     MAIN_LOOP(32, 0x01010101U, 0x80808080U);
 #endif
 
+    /* For the end, buf is the earliest possible position
+     * of a three-byte startcode's leading 0x00. This is
+     * done in order not to run into the undefined behaviour
+     * explained below. */
+    buf--;
 near_end:
-    for (; buf < end; buf++)
-        if (!*buf)
+    while (2 < end - buf) {
+        /* The STARTCODE_CHECK in the MAIN_LOOP can't be used
+         * to check whether the file ends with a startcode,
+         * because for this buf would need to be end - 2.
+         * But depending on the value of end[-1], buf might get
+         * incremented by 3 and therefore be beyond end. But
+         * pointer arithmetic beyond end is undefined behaviour.
+         * So a STARTCODE_CHECK with a different offset is used
+         * here: It detects whether buf points to the first 0x00
+         * of a three-byte startcode. */
+        STARTCODE_CHECK(0);
+    }
+
+    /* No startcode found, but if the last bytes vanish,
+     * they may be part of a startcode whose end is not
+     * in the current buffer. Return the offset of the
+     * earliest element that may belong to a startcode. */
+    for (buf = end; buf > (end - start > 3 ? end - 3 : start); buf--)
+        if (buf[-1])
             break;
+
+    return buf - start;
+
+found_startcode:
+    /* buf points to the byte preceding a startcode's 0x01.
+     * Check whether it is a four-byte startcode. */
+
+    buf -= 1;
+    if (buf > start && buf[-1] == 0)
+        buf--;
+
     return buf - start;
 }
diff --git a/libavcodec/vc1dsp.h b/libavcodec/vc1dsp.h
index 75db62b1b4..be9468add3 100644
--- a/libavcodec/vc1dsp.h
+++ b/libavcodec/vc1dsp.h
@@ -74,10 +74,8 @@  typedef struct VC1DSPContext {
                                      int alpha, int width);
 
     /**
-     * Search buf from the start for up to size bytes. Return the index
-     * of a zero byte, or >= size if not found. Ideally, use lookahead
-     * to filter out any zero bytes that are known to not be followed by
-     * one or more further zero bytes and a one byte.
+     * Search buf from the start for up to size bytes. Return the first 0x00
+     * that might be part of a (three or four) byte startcode.
      */
     int (*startcode_find_candidate)(const uint8_t *buf, int size);
 } VC1DSPContext;