diff mbox

[FFmpeg-devel,1/5] startcode: Use common macro and switch to pointer arithmetic

Message ID 20190609110053.4012-2-andreas.rheinhardt@gmail.com
State New
Headers show

Commit Message

Andreas Rheinhardt June 9, 2019, 11 a.m. UTC
The reasons are cosmetics and preparation for future patches that will
have even more cases and whose performance improves when switching to
direct pointer arithmetic: Benchmarks have shown that using pointers
directly instead of indexing to access the array to be about 5% faster
(33665 vs. 31806 for a 7.4 Mb/s H.264 file based on 10 iterations of
131072 runs each; and 244356 vs 233373 for a 30.2 Mb/s H.264 file based
on 10 iterations with 8192 runs each).

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@gmail.com>
---
 libavcodec/startcode.c | 37 +++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)
diff mbox

Patch

diff --git a/libavcodec/startcode.c b/libavcodec/startcode.c
index 9efdffe8c6..a55a8fafa6 100644
--- a/libavcodec/startcode.c
+++ b/libavcodec/startcode.c
@@ -27,31 +27,32 @@ 
 
 #include "startcode.h"
 #include "config.h"
+#include "libavutil/intreadwrite.h"
 
 int ff_startcode_find_candidate_c(const uint8_t *buf, int size)
 {
-    int i = 0;
+    const uint8_t *start = buf, *end = buf + size;
+
 #if HAVE_FAST_UNALIGNED
-    /* we check i < size instead of i + 3 / 7 because it is
-     * simpler and there must be AV_INPUT_BUFFER_PADDING_SIZE
-     * bytes at the end.
-     */
+#define READ(bitness) AV_RN ## bitness
+#define MAIN_LOOP(bitness, mask1, mask2) do {                              \
+        /* we check p < end instead of p + 3 / 7 because it is
+         * simpler and there must be AV_INPUT_BUFFER_PADDING_SIZE
+         * bytes at the end. */                                            \
+        for (; buf < end; buf += bitness / 8)                              \
+            if ((~READ(bitness)(buf) & (READ(bitness)(buf) - mask1))       \
+                                     & mask2)                              \
+                break;                                                     \
+    } while (0)
+
 #if HAVE_FAST_64BIT
-    while (i < size &&
-            !((~*(const uint64_t *)(buf + i) &
-                    (*(const uint64_t *)(buf + i) - 0x0101010101010101ULL)) &
-                    0x8080808080808080ULL))
-        i += 8;
+    MAIN_LOOP(64, 0x0101010101010101ULL, 0x8080808080808080ULL);
 #else
-    while (i < size &&
-            !((~*(const uint32_t *)(buf + i) &
-                    (*(const uint32_t *)(buf + i) - 0x01010101U)) &
-                    0x80808080U))
-        i += 4;
+    MAIN_LOOP(32, 0x01010101U, 0x80808080U);
 #endif
 #endif
-    for (; i < size; i++)
-        if (!buf[i])
+    for (; buf < end; buf++)
+        if (!*buf)
             break;
-    return i;
+    return buf - start;
 }